From b9e97c9257b8647660d5248a80d21aa7093d21d5 Mon Sep 17 00:00:00 2001 From: Dongyu Zhao Date: Wed, 16 Jul 2025 16:08:12 +0800 Subject: [PATCH] Split MarkdownLanguage types into separate files --- .../MarkdownLanguage+AutoLinkBuilder.swift | 24 + ...MarkdownLanguage+BareAutoLinkBuilder.swift | 42 + .../MarkdownLanguage+BlockQuoteBuilder.swift | 39 + .../MarkdownLanguage+CodeBlockBuilder.swift | 78 + .../Languages/MarkdownLanguage+Element.swift | 34 + .../MarkdownLanguage+EmphasisBuilder.swift | 29 + .../MarkdownLanguage+EntityBuilder.swift | 48 + .../MarkdownLanguage+FootnoteBuilder.swift | 59 + .../MarkdownLanguage+HTMLBlockBuilder.swift | 25 + .../MarkdownLanguage+HTMLBuilder.swift | 25 + .../MarkdownLanguage+HeadingBuilder.swift | 73 + .../MarkdownLanguage+ImageBuilder.swift | 37 + ...ownLanguage+IndentedCodeBlockBuilder.swift | 42 + .../MarkdownLanguage+InlineCodeBuilder.swift | 30 + .../MarkdownLanguage+LinkBuilder.swift | 56 + ...guage+LinkReferenceDefinitionBuilder.swift | 48 + .../MarkdownLanguage+OrderedListBuilder.swift | 122 ++ .../MarkdownLanguage+ParagraphBuilder.swift | 56 + ...arkdownLanguage+SetextHeadingBuilder.swift | 110 ++ ...arkdownLanguage+StrikethroughBuilder.swift | 30 + .../MarkdownLanguage+StrongBuilder.swift | 34 + .../MarkdownLanguage+TableBuilder.swift | 102 ++ ...arkdownLanguage+ThematicBreakBuilder.swift | 47 + .../Languages/MarkdownLanguage+Token.swift | 99 ++ .../MarkdownLanguage+Tokenizer.swift | 145 ++ ...arkdownLanguage+UnorderedListBuilder.swift | 121 ++ .../Languages/MarkdownLanguage.swift | 1451 ----------------- 27 files changed, 1555 insertions(+), 1451 deletions(-) create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+AutoLinkBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+BareAutoLinkBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+BlockQuoteBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+CodeBlockBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+Element.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+EmphasisBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+EntityBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+FootnoteBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBlockBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+HeadingBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+ImageBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+IndentedCodeBlockBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+InlineCodeBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+LinkBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+LinkReferenceDefinitionBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+OrderedListBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+ParagraphBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+SetextHeadingBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+StrikethroughBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+StrongBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+TableBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+ThematicBreakBuilder.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+Token.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+Tokenizer.swift create mode 100644 Sources/SwiftParser/Languages/MarkdownLanguage+UnorderedListBuilder.swift diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+AutoLinkBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+AutoLinkBuilder.swift new file mode 100644 index 0000000..e261549 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+AutoLinkBuilder.swift @@ -0,0 +1,24 @@ +import Foundation + +extension MarkdownLanguage { + public class AutoLinkBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .lessThan = tok { return true } + return false + } + public func build(context: inout CodeContext) { + context.index += 1 + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .greaterThan = tok { context.index += 1; break } + else { text += tok.text; context.index += 1 } + } else { context.index += 1 } + } + context.currentNode.addChild(MarkdownAutoLinkNode(url: text)) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+BareAutoLinkBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+BareAutoLinkBuilder.swift new file mode 100644 index 0000000..1e3ca17 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+BareAutoLinkBuilder.swift @@ -0,0 +1,42 @@ +import Foundation + +extension MarkdownLanguage { + public class BareAutoLinkBuilder: CodeElementBuilder { + private static let regex: NSRegularExpression = { + let pattern = #"^((https?|ftp)://[^\s<>]+|www\.[^\s<>]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})"# + return try! NSRegularExpression(pattern: pattern, options: []) + }() + + public init() {} + + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + let start = tok.range.lowerBound + let text = String(context.input[start...]) + let range = NSRange(location: 0, length: text.utf16.count) + if let m = Self.regex.firstMatch(in: text, range: range), m.range.location == 0 { + return true + } + return false + } + + public func build(context: inout CodeContext) { + guard let tok = context.tokens[context.index] as? Token else { return } + let start = tok.range.lowerBound + let text = String(context.input[start...]) + let range = NSRange(location: 0, length: text.utf16.count) + guard let m = Self.regex.firstMatch(in: text, range: range) else { return } + let endPos = context.input.index(start, offsetBy: m.range.length) + let url = String(context.input[start.. Bool { + guard let tok = token as? Token else { return false } + if case .greaterThan = tok { + if context.index == 0 { return true } + if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { return true } + } + return false + } + public func build(context: inout CodeContext) { + context.index += 1 // skip '>' + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + switch tok { + case .newline: + context.index += 1 + let node = MarkdownBlockQuoteNode(value: text.trimmingCharacters(in: .whitespaces)) + context.currentNode.addChild(node) + return + case .eof: + let node = MarkdownBlockQuoteNode(value: text.trimmingCharacters(in: .whitespaces)) + context.currentNode.addChild(node) + context.index += 1 + return + default: + text += tok.text + context.index += 1 + } + } else { context.index += 1 } + } + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+CodeBlockBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+CodeBlockBuilder.swift new file mode 100644 index 0000000..1ba03b7 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+CodeBlockBuilder.swift @@ -0,0 +1,78 @@ +import Foundation + +extension MarkdownLanguage { + public class CodeBlockBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let first = token as? Token else { return false } + let fenceKind: String + switch first { + case .backtick: fenceKind = "`" + case .tilde: fenceKind = "~" + default: return false + } + var idx = context.index + var count = 0 + while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == fenceKind { + count += 1; idx += 1 + } + guard count >= 3 else { return false } + if context.index == 0 { return true } + if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { + return true + } + return false + } + public func build(context: inout CodeContext) { + guard let startTok = context.tokens[context.index] as? Token else { return } + let fenceKind = startTok.kindDescription + var fenceLength = 0 + while context.index < context.tokens.count, let t = context.tokens[context.index] as? Token, t.kindDescription == fenceKind { + fenceLength += 1 + context.index += 1 + } + // capture info string until end of line and trim whitespace + var info = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .newline = tok { + context.index += 1 + break + } else { + info += tok.text + context.index += 1 + } + } else { + context.index += 1 + } + } + info = info.trimmingCharacters(in: .whitespaces) + let lang = info.split(whereSeparator: { $0.isWhitespace }).first.map(String.init) + + let blockStart = context.index + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + // check for closing fence at start of line + if tok.kindDescription == fenceKind && (context.index == blockStart || (context.index > blockStart && (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline")) { + var idx = context.index + var count = 0 + while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == fenceKind { + count += 1; idx += 1 + } + if count >= fenceLength { + context.index = idx + if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { context.index += 1 } + context.currentNode.addChild(MarkdownCodeBlockNode(lang: lang, content: text)) + return + } + } + text += tok.text + context.index += 1 + } else { context.index += 1 } + } + context.currentNode.addChild(MarkdownCodeBlockNode(lang: lang, content: text)) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+Element.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+Element.swift new file mode 100644 index 0000000..21201f4 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+Element.swift @@ -0,0 +1,34 @@ +import Foundation + +extension MarkdownLanguage { + public enum Element: String, CodeElement { + case root + case paragraph + case heading + case text + case listItem + case orderedListItem + case unorderedList + case orderedList + case emphasis + case strong + case codeBlock + case inlineCode + case link + case blockQuote + case thematicBreak + case image + case html + case entity + case strikethrough + case table + case tableHeader + case tableRow + case tableCell + case autoLink + case linkReferenceDefinition + case footnoteDefinition + case footnoteReference + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+EmphasisBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+EmphasisBuilder.swift new file mode 100644 index 0000000..5469f4d --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+EmphasisBuilder.swift @@ -0,0 +1,29 @@ +import Foundation + +extension MarkdownLanguage { + public class EmphasisBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .star = tok { return true } + if case .underscore = tok { return true } + return false + } + public func build(context: inout CodeContext) { + let snap = context.snapshot() + guard let open = context.tokens[context.index] as? Token else { return } + context.index += 1 + let (children, ok) = MarkdownLanguage.parseInline(context: &context, closing: open, count: 1) + if ok { + let node = MarkdownEmphasisNode(value: "") + children.forEach { node.addChild($0) } + context.currentNode.addChild(node) + } else { + context.restore(snap) + context.currentNode.addChild(MarkdownTextNode(value: open.text)) + context.index += 1 + } + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+EntityBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+EntityBuilder.swift new file mode 100644 index 0000000..3fbee5a --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+EntityBuilder.swift @@ -0,0 +1,48 @@ +import Foundation + +extension MarkdownLanguage { + public class EntityBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .ampersand = tok { return true } + return false + } + public func build(context: inout CodeContext) { + context.index += 1 + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .semicolon = tok { context.index += 1; break } + else { text += tok.text; context.index += 1 } + } else { context.index += 1 } + } + let decoded = decode(text) + context.currentNode.addChild(MarkdownEntityNode(value: decoded)) + } + + private func decode(_ entity: String) -> String { + switch entity { + case "amp": return "&" + case "lt": return "<" + case "gt": return ">" + case "quot": return "\"" + case "apos": return "'" + default: + if entity.hasPrefix("#x") || entity.hasPrefix("#X") { + let hex = entity.dropFirst(2) + if let value = UInt32(hex, radix: 16), let scalar = UnicodeScalar(value) { + return String(Character(scalar)) + } + } else if entity.hasPrefix("#") { + let num = entity.dropFirst() + if let value = UInt32(num), let scalar = UnicodeScalar(value) { + return String(Character(scalar)) + } + } + return "&" + entity + ";" + } + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+FootnoteBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+FootnoteBuilder.swift new file mode 100644 index 0000000..8c30c1b --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+FootnoteBuilder.swift @@ -0,0 +1,59 @@ +import Foundation + +extension MarkdownLanguage { + public class FootnoteBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let lb = token as? Token, case .lbracket = lb else { return false } + guard context.index + 2 < context.tokens.count else { return false } + guard let first = context.tokens[context.index + 1] as? Token else { return false } + if case .text(let s, _) = first, s.starts(with: "^") { + var idx = context.index + 2 + while idx < context.tokens.count { + if let t = context.tokens[idx] as? Token { + if case .rbracket = t { return true } + if case .text = t { + idx += 1; continue + } + if case .number = t { + idx += 1; continue + } + } + break + } + } + return false + } + public func build(context: inout CodeContext) { + context.index += 1 // skip [ + var id = "" + while context.index < context.tokens.count { + guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } + if case .rbracket = tok { break } + id += tok.text + context.index += 1 + } + if id.hasPrefix("^") { id.removeFirst() } + if context.index < context.tokens.count { context.index += 1 } // skip ] + + if context.index < context.tokens.count, + let colon = context.tokens[context.index] as? Token, + case .text(let s, _) = colon, + s.trimmingCharacters(in: .whitespaces).hasPrefix(":") { + var text = s + context.index += 1 + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .newline = tok { context.index += 1; break } + else { text += tok.text; context.index += 1 } + } else { context.index += 1 } + } + if text.hasPrefix(":") { text.removeFirst() } + let trimmed = text.trimmingCharacters(in: .whitespaces) + context.currentNode.addChild(MarkdownFootnoteDefinitionNode(identifier: id, text: trimmed)) + } else { + context.currentNode.addChild(MarkdownFootnoteReferenceNode(identifier: id)) + } + } + } +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBlockBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBlockBuilder.swift new file mode 100644 index 0000000..b15c676 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBlockBuilder.swift @@ -0,0 +1,25 @@ +import Foundation + +extension MarkdownLanguage { + public class HTMLBlockBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .lessThan = tok, context.index == 0 { + let rest = String(context.input[tok.range.upperBound...]).lowercased() + return rest.hasPrefix("!doctype") || rest.hasPrefix("html") + } + return false + } + public func build(context: inout CodeContext) { + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { text += tok.text } + context.index += 1 + } + let closed = MarkdownLanguage.isHTMLClosed(text) + context.currentNode.addChild(MarkdownHtmlNode(value: text, closed: closed)) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBuilder.swift new file mode 100644 index 0000000..f7c7202 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+HTMLBuilder.swift @@ -0,0 +1,25 @@ +import Foundation + +extension MarkdownLanguage { + public class HTMLBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + return tok.kindDescription == "<" + } + public func build(context: inout CodeContext) { + context.index += 1 // skip < + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .greaterThan = tok { context.index += 1; break } + else { text += tok.text; context.index += 1 } + } else { context.index += 1 } + } + let html = "<" + text + ">" + let closed = MarkdownLanguage.isHTMLClosed(html) + context.currentNode.addChild(MarkdownHtmlNode(value: text, closed: closed)) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+HeadingBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+HeadingBuilder.swift new file mode 100644 index 0000000..41e1f91 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+HeadingBuilder.swift @@ -0,0 +1,73 @@ +import Foundation + +extension MarkdownLanguage { + public class HeadingBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .hash = tok { + if context.index == 0 { return true } + if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { + return true + } + } + return false + } + public func build(context: inout CodeContext) { + var count = 0 + while context.index < context.tokens.count, + let tok = context.tokens[context.index] as? Token, + case .hash = tok, + count < 6 { + count += 1 + context.index += 1 + } + var tokens: [Token] = [] + while context.index < context.tokens.count { + guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } + switch tok { + case .newline, .eof: + context.index += 1 + default: + tokens.append(tok) + context.index += 1 + } + if case .newline = tok { break } + if case .eof = tok { break } + } + + // Trim trailing whitespace + while let last = tokens.last, case .text(let s, _) = last, s.trimmingCharacters(in: .whitespaces).isEmpty { + tokens.removeLast() + } + // Remove trailing '#' sequences + while let last = tokens.last, case .hash = last { + tokens.removeLast() + while let l = tokens.last, case .text(let s, _) = l, s.trimmingCharacters(in: .whitespaces).isEmpty { + tokens.removeLast() + } + } + while let last = tokens.last, case .text(let s, _) = last, s.trimmingCharacters(in: .whitespaces).isEmpty { + tokens.removeLast() + } + + // Remove spaces before hard breaks + var processed: [Token] = [] + for tok in tokens { + if case .hardBreak = tok { + while let l = processed.last, case .text(let s, _) = l, s.allSatisfy({ $0 == " " }) { + processed.removeLast() + } + } + processed.append(tok) + } + + let trimmedValue = processed.map { $0.text }.joined().trimmingCharacters(in: .whitespaces) + let children = MarkdownLanguage.parseInlineTokens(processed, input: context.input) + let node = MarkdownHeadingNode(value: trimmedValue, level: count) + children.forEach { node.addChild($0) } + context.currentNode.addChild(node) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+ImageBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+ImageBuilder.swift new file mode 100644 index 0000000..1be1791 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+ImageBuilder.swift @@ -0,0 +1,37 @@ +import Foundation + +extension MarkdownLanguage { + public class ImageBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .exclamation = tok, + context.index + 1 < context.tokens.count, + let next = context.tokens[context.index + 1] as? Token, + case .lbracket = next { return true } + return false + } + public func build(context: inout CodeContext) { + context.index += 2 // skip ![ + var alt = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .rbracket = tok { context.index += 1; break } + else { alt += tok.text; context.index += 1 } + } else { context.index += 1 } + } + var url = "" + if context.index < context.tokens.count, let lp = context.tokens[context.index] as? Token, case .lparen = lp { + context.index += 1 + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .rparen = tok { context.index += 1; break } + else { url += tok.text; context.index += 1 } + } else { context.index += 1 } + } + } + context.currentNode.addChild(MarkdownImageNode(alt: alt, url: url)) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+IndentedCodeBlockBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+IndentedCodeBlockBuilder.swift new file mode 100644 index 0000000..e3e2e08 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+IndentedCodeBlockBuilder.swift @@ -0,0 +1,42 @@ +import Foundation + +extension MarkdownLanguage { + public class IndentedCodeBlockBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .text(let s, _) = tok { + if (context.index == 0 || (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline") && s.hasPrefix(" ") { + return true + } + } + return false + } + public func build(context: inout CodeContext) { + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + switch tok { + case .newline: + context.index += 1 + if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, case .text(let s, _) = next, s.hasPrefix(" ") { + text += "\n" + String(s.dropFirst(4)) + context.index += 1 + } else { + context.currentNode.addChild(MarkdownCodeBlockNode(lang: nil, content: text)) + return + } + case .text(let s, _): + text += String(s.dropFirst(4)) + context.index += 1 + default: + text += tok.text + context.index += 1 + } + } else { context.index += 1 } + } + context.currentNode.addChild(MarkdownCodeBlockNode(lang: nil, content: text)) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+InlineCodeBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+InlineCodeBuilder.swift new file mode 100644 index 0000000..12d29f4 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+InlineCodeBuilder.swift @@ -0,0 +1,30 @@ +import Foundation + +extension MarkdownLanguage { + public class InlineCodeBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .backtick = tok { return true } + return false + } + public func build(context: inout CodeContext) { + context.index += 1 + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token, case .backtick = tok { + context.index += 1 + let node = MarkdownInlineCodeNode(value: text) + context.currentNode.addChild(node) + return + } else if let tok = context.tokens[context.index] as? Token { + text += tok.text + context.index += 1 + } else { context.index += 1 } + } + let node = MarkdownInlineCodeNode(value: text) + context.currentNode.addChild(node) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+LinkBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+LinkBuilder.swift new file mode 100644 index 0000000..3134385 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+LinkBuilder.swift @@ -0,0 +1,56 @@ +import Foundation + +extension MarkdownLanguage { + public class LinkBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .lbracket = tok { return true } + return false + } + public func build(context: inout CodeContext) { + context.index += 1 + var textTokens: [Token] = [] + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .rbracket = tok { + context.index += 1 + break + } else { + textTokens.append(tok) + context.index += 1 + } + } else { context.index += 1 } + } + let textNodes = MarkdownLanguage.parseInlineTokens(textTokens, input: context.input) + var url = "" + if context.index < context.tokens.count, let lparen = context.tokens[context.index] as? Token, case .lparen = lparen { + context.index += 1 + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .rparen = tok { + context.index += 1 + break + } else { + url += tok.text + context.index += 1 + } + } else { context.index += 1 } + } + } else if context.index + 2 < context.tokens.count, + let lb = context.tokens[context.index] as? Token, case .lbracket = lb, + let idTok = context.tokens[context.index + 1] as? Token, + let rb = context.tokens[context.index + 2] as? Token, case .rbracket = rb, + case .text(let id, _) = idTok { + context.index += 3 + let key = id.trimmingCharacters(in: .whitespaces).lowercased() + if let ref = context.linkReferences[key] { + url = ref + } + } + let node = MarkdownLinkNode(text: textNodes, url: url) + context.currentNode.addChild(node) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+LinkReferenceDefinitionBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+LinkReferenceDefinitionBuilder.swift new file mode 100644 index 0000000..605fc62 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+LinkReferenceDefinitionBuilder.swift @@ -0,0 +1,48 @@ +import Foundation + +extension MarkdownLanguage { + public class LinkReferenceDefinitionBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard context.index + 3 < context.tokens.count else { return false } + guard let lb = token as? Token, + let txt = context.tokens[context.index + 1] as? Token, + let rb = context.tokens[context.index + 2] as? Token, + let colon = context.tokens[context.index + 3] as? Token else { return false } + if case .lbracket = lb, + case .text = txt, + case .rbracket = rb, + case .text(let s, _) = colon, + s.trimmingCharacters(in: .whitespaces).hasPrefix(":") { + return true + } + return false + } + public func build(context: inout CodeContext) { + context.index += 1 + var id = "" + if context.index < context.tokens.count, let idTok = context.tokens[context.index] as? Token, case .text(let s, _) = idTok { + id = s + context.index += 1 + } + if context.index < context.tokens.count { context.index += 1 } // skip ] + var text = "" + if context.index < context.tokens.count, let colon = context.tokens[context.index] as? Token, case .text(let s, _) = colon { + text = s + context.index += 1 + } + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .newline = tok { context.index += 1; break } + else { text += tok.text; context.index += 1 } + } else { context.index += 1 } + } + var url = text.trimmingCharacters(in: .whitespaces) + if url.hasPrefix(":") { url.removeFirst() } + url = url.trimmingCharacters(in: .whitespaces) + let trimmedID = id.trimmingCharacters(in: .whitespaces) + context.linkReferences[trimmedID.lowercased()] = url + context.currentNode.addChild(MarkdownLinkReferenceDefinitionNode(identifier: trimmedID, url: url)) + } + } +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+OrderedListBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+OrderedListBuilder.swift new file mode 100644 index 0000000..3323aed --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+OrderedListBuilder.swift @@ -0,0 +1,122 @@ +import Foundation + +extension MarkdownLanguage { + public class OrderedListBuilder: CodeElementBuilder { + public init() {} + + private func lineIndent(before idx: Int, in context: CodeContext) -> Int? { + if idx == 0 { return 0 } + var i = idx - 1 + var count = 0 + while i >= 0 { + guard let tok = context.tokens[i] as? Token else { return nil } + switch tok { + case .newline: + return count + case .text(let s, _) where s.allSatisfy({ $0 == " " }): + count += s.count + i -= 1 + default: + return nil + } + } + return count + } + + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token, case .number = tok else { return false } + guard context.index + 1 < context.tokens.count, + let dot = context.tokens[context.index + 1] as? Token, case .dot = dot else { return false } + if let _ = lineIndent(before: context.index, in: context) { return true } + return false + } + + public func build(context: inout CodeContext) { + func parseList(_ indent: Int, _ depth: Int) -> CodeNode { + let list = MarkdownOrderedListNode(value: "", level: depth) + var isLoose = false + while context.index < context.tokens.count { + guard context.index + 1 < context.tokens.count, + let num = context.tokens[context.index] as? Token, case .number = num, + let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, + lineIndent(before: context.index, in: context) == indent else { break } + let (node, loose) = parseItem(indent, depth) + if loose { isLoose = true } + list.addChild(node) + } + list.value = isLoose ? "loose" : "tight" + return list + } + + func parseItem(_ indent: Int, _ depth: Int) -> (CodeNode, Bool) { + var loose = false + context.index += 2 + if context.index < context.tokens.count, + let t = context.tokens[context.index] as? Token, + case .text(let s, _) = t, s.first?.isWhitespace == true { + context.index += 1 + } + + let node = MarkdownOrderedListItemNode(value: "") + var text = "" + itemLoop: while context.index < context.tokens.count { + guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } + switch tok { + case .newline: + context.index += 1 + if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { + loose = true + context.index += 1 + } + let start = context.index + var spaces = 0 + if start < context.tokens.count, let sTok = context.tokens[start] as? Token, case .text(let s, _) = sTok, s.allSatisfy({ $0 == " " }) { + spaces = s.count + context.index += 1 + } + if context.index + 1 < context.tokens.count, + let nextNum = context.tokens[context.index] as? Token, case .number = nextNum, + let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, + spaces > indent { + let sub = parseList(spaces, depth + 1) + node.addChild(sub) + if context.index + 1 < context.tokens.count, + let nextBullet = context.tokens[context.index] as? Token, case .number = nextBullet, + let ndot = context.tokens[context.index + 1] as? Token, case .dot = ndot, + (lineIndent(before: context.index, in: context) ?? 0) <= indent { + break itemLoop + } + } else if context.index + 1 < context.tokens.count, + let nextNum = context.tokens[context.index] as? Token, case .number = nextNum, + let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, + spaces == indent { + context.index = start + break itemLoop + } else if spaces > indent { + text += "\n" + } else if spaces < indent { + context.index = start + break itemLoop + } else { + text += "\n" + } + case .eof: + context.index += 1 + break itemLoop + default: + text += tok.text + context.index += 1 + } + } + node.value = text.trimmingCharacters(in: .whitespaces) + return (node, loose) + } + + if let ind = lineIndent(before: context.index, in: context) { + let list = parseList(ind, 1) + context.currentNode.addChild(list) + } + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+ParagraphBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+ParagraphBuilder.swift new file mode 100644 index 0000000..3862147 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+ParagraphBuilder.swift @@ -0,0 +1,56 @@ +import Foundation + +extension MarkdownLanguage { + public class ParagraphBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + if token is Token { return true } else { return false } + } + public func build(context: inout CodeContext) { + var tokens: [Token] = [] + var ended = false + while context.index < context.tokens.count { + guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } + switch tok { + case .text, .star, .underscore, .backtick: + tokens.append(tok) + context.index += 1 + case .hardBreak: + while let last = tokens.last, case .text(let s, _) = last, s.allSatisfy({ $0 == " " }) { + tokens.removeLast() + } + tokens.append(tok) + context.index += 1 + case .newline: + context.index += 1 + ended = true + case .dash, .hash, .plus, .lbracket, + .greaterThan, .exclamation, .tilde, .equal, .lessThan, .ampersand, .semicolon, .pipe: + ended = true + case .number: + if context.index + 1 < context.tokens.count, + let dot = context.tokens[context.index + 1] as? Token, + case .dot = dot { + ended = true + } else { + tokens.append(tok) + context.index += 1 + } + case .eof: + context.index += 1 + ended = true + case .dot, .rbracket, .lparen, .rparen: + tokens.append(tok) + context.index += 1 + } + if ended { break } + } + + let value = tokens.map { $0.text }.joined() + let children = MarkdownLanguage.parseInlineTokens(tokens, input: context.input) + let node = MarkdownParagraphNode(value: value) + children.forEach { node.addChild($0) } + context.currentNode.addChild(node) + } + } +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+SetextHeadingBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+SetextHeadingBuilder.swift new file mode 100644 index 0000000..bbdb8ef --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+SetextHeadingBuilder.swift @@ -0,0 +1,110 @@ +import Foundation + +extension MarkdownLanguage { + public class SetextHeadingBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard token is Token else { return false } + if context.index > 0 { + if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { + // ok + } else if context.index != 0 { + return false + } + } + + var idx = context.index + var sawText = false + while idx < context.tokens.count { + guard let t = context.tokens[idx] as? Token else { return false } + if case .newline = t { break } + if case .eof = t { return false } + sawText = true + idx += 1 + } + guard sawText else { return false } + guard idx < context.tokens.count, let nl = context.tokens[idx] as? Token, case .newline = nl else { return false } + idx += 1 + guard idx < context.tokens.count else { return false } + + var kind: Token? + var count = 0 + while idx < context.tokens.count { + guard let tok = context.tokens[idx] as? Token else { return false } + switch tok { + case .dash: + if kind == nil { kind = tok } + if case .dash = kind! { count += 1; idx += 1 } else { return false } + case .equal: + if kind == nil { kind = tok } + if case .equal = kind! { count += 1; idx += 1 } else { return false } + case .text(let s, _): + if s.trimmingCharacters(in: .whitespaces).isEmpty { idx += 1 } else { return false } + case .newline, .eof: + break + default: + return false + } + if idx < context.tokens.count, let next = context.tokens[idx] as? Token { + if case .newline = next { break } + if case .eof = next { break } + } + } + if count == 0 { return false } + if idx < context.tokens.count, let endTok = context.tokens[idx] as? Token { + switch endTok { + case .newline, .eof: + return true + default: + return false + } + } + return false + } + public func build(context: inout CodeContext) { + var text = "" + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + if case .newline = tok { + context.index += 1 + break + } else { + text += tok.text + context.index += 1 + } + } else { context.index += 1 } + } + var level: Int? + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + switch tok { + case .dash: + if level == nil { level = 2 } + context.index += 1 + case .equal: + if level == nil { level = 1 } + context.index += 1 + case .text(let s, _) where s.trimmingCharacters(in: .whitespaces).isEmpty: + context.index += 1 + case .newline: + context.index += 1 + let node = MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1) + context.currentNode.addChild(node) + return + case .eof: + context.index += 1 + let node = MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1) + context.currentNode.addChild(node) + return + default: + context.index += 1 + } + } else { context.index += 1 } + } + context.currentNode.addChild(MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1)) + } + } + + // MARK: - List Parsing + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+StrikethroughBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+StrikethroughBuilder.swift new file mode 100644 index 0000000..b6f66a1 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+StrikethroughBuilder.swift @@ -0,0 +1,30 @@ +import Foundation + +extension MarkdownLanguage { + public class StrikethroughBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard context.index + 1 < context.tokens.count else { return false } + guard let t1 = token as? Token, let t2 = context.tokens[context.index + 1] as? Token else { return false } + return t1.kindDescription == "~" && t2.kindDescription == "~" + } + public func build(context: inout CodeContext) { + context.index += 2 + var text = "" + while context.index + 1 < context.tokens.count { + if let t1 = context.tokens[context.index] as? Token, + let t2 = context.tokens[context.index + 1] as? Token, + t1.kindDescription == "~" && t2.kindDescription == "~" { + context.index += 2 + context.currentNode.addChild(MarkdownStrikethroughNode(value: text)) + return + } else if let tok = context.tokens[context.index] as? Token { + text += tok.text + context.index += 1 + } else { context.index += 1 } + } + context.currentNode.addChild(MarkdownStrikethroughNode(value: text)) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+StrongBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+StrongBuilder.swift new file mode 100644 index 0000000..cb12cd1 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+StrongBuilder.swift @@ -0,0 +1,34 @@ +import Foundation + +extension MarkdownLanguage { + public class StrongBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard context.index + 1 < context.tokens.count else { return false } + guard let t1 = token as? Token, + let t2 = context.tokens[context.index + 1] as? Token else { return false } + switch (t1, t2) { + case (.star, .star), (.underscore, .underscore): + return true + default: + return false + } + } + public func build(context: inout CodeContext) { + let snap = context.snapshot() + guard let open = context.tokens[context.index] as? Token else { return } + context.index += 2 + let (children, ok) = MarkdownLanguage.parseInline(context: &context, closing: open, count: 2) + if ok { + let node = MarkdownStrongNode(value: "") + children.forEach { node.addChild($0) } + context.currentNode.addChild(node) + } else { + context.restore(snap) + context.currentNode.addChild(MarkdownTextNode(value: open.text + open.text)) + context.index += 2 + } + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+TableBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+TableBuilder.swift new file mode 100644 index 0000000..5b4e34c --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+TableBuilder.swift @@ -0,0 +1,102 @@ +import Foundation + +extension MarkdownLanguage { + public class TableBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + if case .pipe = tok { + if context.index == 0 { return true } + if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { return true } + } + return false + } + func parseRow(_ context: inout CodeContext) -> [String] { + var cells: [String] = [] + var cell = "" + context.index += 1 // skip leading pipe + while context.index < context.tokens.count { + if let tok = context.tokens[context.index] as? Token { + switch tok { + case .pipe: + cells.append(cell.trimmingCharacters(in: .whitespaces)) + cell = "" + context.index += 1 + case .newline, .eof: + cells.append(cell.trimmingCharacters(in: .whitespaces)) + if let last = cells.last, last.isEmpty { cells.removeLast() } + context.index += 1 + return cells + default: + cell += tok.text + context.index += 1 + } + } else { + context.index += 1 + } + } + if !cell.isEmpty || !cells.isEmpty { + cells.append(cell.trimmingCharacters(in: .whitespaces)) + } + return cells + } + + func parseDelimiter(_ context: inout CodeContext) -> [String]? { + guard context.index < context.tokens.count, + let first = context.tokens[context.index] as? Token, + case .pipe = first else { return nil } + var snapshot = context.snapshot() + let cells = parseRow(&context) + for cell in cells { + var trimmed = cell.trimmingCharacters(in: .whitespaces) + if trimmed.hasPrefix(":") { trimmed.removeFirst() } + if trimmed.hasSuffix(":") { trimmed.removeLast() } + if trimmed.count < 3 { context.restore(snapshot); return nil } + if !trimmed.allSatisfy({ $0 == "-" }) { + context.restore(snapshot); return nil + } + } + return cells + } + + public func build(context: inout CodeContext) { + var ctx = context + let header = parseRow(&ctx) + let startIndex = ctx.index + if let _ = parseDelimiter(&ctx) { + var rows: [[String]] = [] + while ctx.index < ctx.tokens.count, + let tok = ctx.tokens[ctx.index] as? Token, + case .pipe = tok { + rows.append(parseRow(&ctx)) + } + + let table = MarkdownTableNode() + let headerNode = MarkdownTableHeaderNode() + for cell in header { + let cellNode = MarkdownTableCellNode() + cellNode.addChild(MarkdownTextNode(value: cell)) + headerNode.addChild(cellNode) + } + table.addChild(headerNode) + + for row in rows { + let rowNode = MarkdownTableRowNode() + for cell in row { + let cellNode = MarkdownTableCellNode() + cellNode.addChild(MarkdownTextNode(value: cell)) + rowNode.addChild(cellNode) + } + table.addChild(rowNode) + } + + context = ctx + context.currentNode.addChild(table) + } else { + context.index = startIndex + context.currentNode.addChild(MarkdownTableNode(value: header.joined(separator: "|"))) + } + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+ThematicBreakBuilder.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+ThematicBreakBuilder.swift new file mode 100644 index 0000000..450046c --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+ThematicBreakBuilder.swift @@ -0,0 +1,47 @@ +import Foundation + +extension MarkdownLanguage { + public class ThematicBreakBuilder: CodeElementBuilder { + public init() {} + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token else { return false } + switch tok { + case .dash, .star, .underscore: + if context.index == 0 || (context.index > 0 && (context.tokens[context.index - 1] as? Token) is Token && (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline") { + var count = 0 + var idx = context.index + while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == tok.kindDescription { + count += 1; idx += 1 + } + if count >= 3 { + return true + } + } + default: + break + } + return false + } + public func build(context: inout CodeContext) { + if context.index < context.tokens.count, + let tok = context.tokens[context.index] as? Token { + let kind = tok.kindDescription + while context.index < context.tokens.count { + if let t = context.tokens[context.index] as? Token, + t.kindDescription == kind { + context.index += 1 + } else { + break + } + } + } + if context.index < context.tokens.count, + let nl = context.tokens[context.index] as? Token, + case .newline = nl { + context.index += 1 + } + context.currentNode.addChild(MarkdownThematicBreakNode(value: "")) + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+Token.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+Token.swift new file mode 100644 index 0000000..0dbb012 --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+Token.swift @@ -0,0 +1,99 @@ +import Foundation + +extension MarkdownLanguage { + public enum Token: CodeToken { + case text(String, Range) + case hash(Range) + case dash(Range) + case star(Range) + case underscore(Range) + case plus(Range) + case backtick(Range) + case greaterThan(Range) + case exclamation(Range) + case tilde(Range) + case equal(Range) + case lessThan(Range) + case ampersand(Range) + case semicolon(Range) + case pipe(Range) + case lbracket(Range) + case rbracket(Range) + case lparen(Range) + case rparen(Range) + case dot(Range) + case number(String, Range) + case hardBreak(Range) + case newline(Range) + case eof(Range) + + public var kindDescription: String { + switch self { + case .text: return "text" + case .hash: return "#" + case .dash: return "-" + case .star: return "*" + case .underscore: return "_" + case .plus: return "+" + case .backtick: return "`" + case .greaterThan: return ">" + case .exclamation: return "!" + case .tilde: return "~" + case .equal: return "=" + case .lessThan: return "<" + case .ampersand: return "&" + case .semicolon: return ";" + case .pipe: return "|" + case .lbracket: return "[" + case .rbracket: return "]" + case .lparen: return "(" + case .rparen: return ")" + case .dot: return "." + case .number: return "number" + case .hardBreak: return "hardBreak" + case .newline: return "newline" + case .eof: return "eof" + } + } + + public var text: String { + switch self { + case .text(let s, _): return s + case .hash: return "#" + case .dash: return "-" + case .star: return "*" + case .underscore: return "_" + case .plus: return "+" + case .backtick: return "`" + case .greaterThan: return ">" + case .exclamation: return "!" + case .tilde: return "~" + case .equal: return "=" + case .lessThan: return "<" + case .ampersand: return "&" + case .semicolon: return ";" + case .pipe: return "|" + case .lbracket: return "[" + case .rbracket: return "]" + case .lparen: return "(" + case .rparen: return ")" + case .dot: return "." + case .number(let s, _): return s + case .hardBreak, .newline: return "\n" + case .eof: return "" + } + } + + public var range: Range { + switch self { + case .text(_, let r), .hash(let r), .dash(let r), .star(let r), .underscore(let r), + .plus(let r), .backtick(let r), .greaterThan(let r), .exclamation(let r), .tilde(let r), + .equal(let r), .lessThan(let r), .ampersand(let r), .semicolon(let r), .pipe(let r), + .lbracket(let r), .rbracket(let r), .lparen(let r), .rparen(let r), .dot(let r), + .number(_, let r), .hardBreak(let r), .newline(let r), .eof(let r): + return r + } + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage+Tokenizer.swift b/Sources/SwiftParser/Languages/MarkdownLanguage+Tokenizer.swift new file mode 100644 index 0000000..492fe9a --- /dev/null +++ b/Sources/SwiftParser/Languages/MarkdownLanguage+Tokenizer.swift @@ -0,0 +1,145 @@ +import Foundation + +extension MarkdownLanguage { + public class Tokenizer: CodeTokenizer { + public init() {} + + public func tokenize(_ input: String) -> [any CodeToken] { + var tokens: [Token] = [] + var index = input.startIndex + func advance() { index = input.index(after: index) } + func add(_ t: Token) { tokens.append(t) } + while index < input.endIndex { + let ch = input[index] + if ch == "\\" { + let start = index + advance() + if index < input.endIndex { + let escaped = input[index] + advance() + add(.text(String(escaped), start.." { + let start = index + advance() + add(.greaterThan(start.. input.startIndex { + var i = input.index(before: start) + var spaceCount = 0 + while input[i] == " " { + spaceCount += 1 + if i == input.startIndex { break } + i = input.index(before: i) + } + if spaceCount >= 2 { + isHard = true + } else if spaceCount == 0 && input[i] == "\\" { + isHard = true + } + } + advance() + if isHard { + add(.hardBreak(start..!~|;&=\\".contains(input[index]) && + !input[index].isNumber { + advance() + } + let text = String(input[start.. Int? { + if idx == 0 { return 0 } + var i = idx - 1 + var count = 0 + while i >= 0 { + guard let tok = context.tokens[i] as? Token else { return nil } + switch tok { + case .newline: + return count + case .text(let s, _) where s.allSatisfy({ $0 == " " }): + count += s.count + i -= 1 + default: + return nil + } + } + return count + } + + private func isBullet(_ tok: Token) -> Bool { + switch tok { + case .dash, .star, .plus: return true + default: return false + } + } + + public func accept(context: CodeContext, token: any CodeToken) -> Bool { + guard let tok = token as? Token, isBullet(tok) else { return false } + guard context.index + 1 < context.tokens.count, + let next = context.tokens[context.index + 1] as? Token, + case .text(let s, _) = next, s.first?.isWhitespace == true else { + return false + } + if let ind = lineIndent(before: context.index, in: context) { return ind >= 0 } else { return false } + } + + public func build(context: inout CodeContext) { + func parseList(_ indent: Int, _ depth: Int) -> CodeNode { + let list = MarkdownUnorderedListNode(value: "", level: depth) + var isLoose = false + while context.index < context.tokens.count { + guard let bullet = context.tokens[context.index] as? Token, isBullet(bullet), lineIndent(before: context.index, in: context) == indent else { break } + let (node, loose) = parseItem(indent, depth) + if loose { isLoose = true } + list.addChild(node) + } + list.value = isLoose ? "loose" : "tight" + return list + } + + func parseItem(_ indent: Int, _ depth: Int) -> (CodeNode, Bool) { + var loose = false + // skip bullet and following whitespace + context.index += 1 + if context.index < context.tokens.count, + let t = context.tokens[context.index] as? Token, + case .text(let s, _) = t, s.first?.isWhitespace == true { + context.index += 1 + } + + let node = MarkdownListItemNode(value: "") + var text = "" + itemLoop: while context.index < context.tokens.count { + guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } + switch tok { + case .newline: + context.index += 1 + // Check for blank line + if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { + loose = true + context.index += 1 + } + let start = context.index + var spaces = 0 + if start < context.tokens.count, let sTok = context.tokens[start] as? Token, case .text(let s, _) = sTok, s.allSatisfy({ $0 == " " }) { + spaces = s.count + context.index += 1 + } + if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, isBullet(next), spaces > indent { + let sub = parseList(spaces, depth + 1) + node.addChild(sub) + if context.index < context.tokens.count, let nextTok = context.tokens[context.index] as? Token, isBullet(nextTok), (lineIndent(before: context.index, in: context) ?? 0) <= indent { + break itemLoop + } + } else if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, isBullet(next), spaces == indent { + context.index = start + break itemLoop + } else if spaces > indent { + text += "\n" + } else if spaces < indent { + context.index = start + break itemLoop + } else { + text += "\n" + } + case .eof: + context.index += 1 + break itemLoop + default: + text += tok.text + context.index += 1 + } + } + node.value = text.trimmingCharacters(in: .whitespaces) + return (node, loose) + } + + if let ind = lineIndent(before: context.index, in: context) { + let list = parseList(ind, 1) + context.currentNode.addChild(list) + } + } + } + +} diff --git a/Sources/SwiftParser/Languages/MarkdownLanguage.swift b/Sources/SwiftParser/Languages/MarkdownLanguage.swift index 9b8e603..11bd948 100644 --- a/Sources/SwiftParser/Languages/MarkdownLanguage.swift +++ b/Sources/SwiftParser/Languages/MarkdownLanguage.swift @@ -1,1274 +1,8 @@ import Foundation public struct MarkdownLanguage: CodeLanguage { - public enum Element: String, CodeElement { - case root - case paragraph - case heading - case text - case listItem - case orderedListItem - case unorderedList - case orderedList - case emphasis - case strong - case codeBlock - case inlineCode - case link - case blockQuote - case thematicBreak - case image - case html - case entity - case strikethrough - case table - case tableHeader - case tableRow - case tableCell - case autoLink - case linkReferenceDefinition - case footnoteDefinition - case footnoteReference - } - - public enum Token: CodeToken { - case text(String, Range) - case hash(Range) - case dash(Range) - case star(Range) - case underscore(Range) - case plus(Range) - case backtick(Range) - case greaterThan(Range) - case exclamation(Range) - case tilde(Range) - case equal(Range) - case lessThan(Range) - case ampersand(Range) - case semicolon(Range) - case pipe(Range) - case lbracket(Range) - case rbracket(Range) - case lparen(Range) - case rparen(Range) - case dot(Range) - case number(String, Range) - case hardBreak(Range) - case newline(Range) - case eof(Range) - - public var kindDescription: String { - switch self { - case .text: return "text" - case .hash: return "#" - case .dash: return "-" - case .star: return "*" - case .underscore: return "_" - case .plus: return "+" - case .backtick: return "`" - case .greaterThan: return ">" - case .exclamation: return "!" - case .tilde: return "~" - case .equal: return "=" - case .lessThan: return "<" - case .ampersand: return "&" - case .semicolon: return ";" - case .pipe: return "|" - case .lbracket: return "[" - case .rbracket: return "]" - case .lparen: return "(" - case .rparen: return ")" - case .dot: return "." - case .number: return "number" - case .hardBreak: return "hardBreak" - case .newline: return "newline" - case .eof: return "eof" - } - } - - public var text: String { - switch self { - case .text(let s, _): return s - case .hash: return "#" - case .dash: return "-" - case .star: return "*" - case .underscore: return "_" - case .plus: return "+" - case .backtick: return "`" - case .greaterThan: return ">" - case .exclamation: return "!" - case .tilde: return "~" - case .equal: return "=" - case .lessThan: return "<" - case .ampersand: return "&" - case .semicolon: return ";" - case .pipe: return "|" - case .lbracket: return "[" - case .rbracket: return "]" - case .lparen: return "(" - case .rparen: return ")" - case .dot: return "." - case .number(let s, _): return s - case .hardBreak, .newline: return "\n" - case .eof: return "" - } - } - - public var range: Range { - switch self { - case .text(_, let r), .hash(let r), .dash(let r), .star(let r), .underscore(let r), - .plus(let r), .backtick(let r), .greaterThan(let r), .exclamation(let r), .tilde(let r), - .equal(let r), .lessThan(let r), .ampersand(let r), .semicolon(let r), .pipe(let r), - .lbracket(let r), .rbracket(let r), .lparen(let r), .rparen(let r), .dot(let r), - .number(_, let r), .hardBreak(let r), .newline(let r), .eof(let r): - return r - } - } - } - - public class Tokenizer: CodeTokenizer { - public init() {} - - public func tokenize(_ input: String) -> [any CodeToken] { - var tokens: [Token] = [] - var index = input.startIndex - func advance() { index = input.index(after: index) } - func add(_ t: Token) { tokens.append(t) } - while index < input.endIndex { - let ch = input[index] - if ch == "\\" { - let start = index - advance() - if index < input.endIndex { - let escaped = input[index] - advance() - add(.text(String(escaped), start.." { - let start = index - advance() - add(.greaterThan(start.. input.startIndex { - var i = input.index(before: start) - var spaceCount = 0 - while input[i] == " " { - spaceCount += 1 - if i == input.startIndex { break } - i = input.index(before: i) - } - if spaceCount >= 2 { - isHard = true - } else if spaceCount == 0 && input[i] == "\\" { - isHard = true - } - } - advance() - if isHard { - add(.hardBreak(start..!~|;&=\\".contains(input[index]) && - !input[index].isNumber { - advance() - } - let text = String(input[start.. Bool { - guard let tok = token as? Token else { return false } - if case .hash = tok { - if context.index == 0 { return true } - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { - return true - } - } - return false - } - public func build(context: inout CodeContext) { - var count = 0 - while context.index < context.tokens.count, - let tok = context.tokens[context.index] as? Token, - case .hash = tok, - count < 6 { - count += 1 - context.index += 1 - } - var tokens: [Token] = [] - while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - switch tok { - case .newline, .eof: - context.index += 1 - default: - tokens.append(tok) - context.index += 1 - } - if case .newline = tok { break } - if case .eof = tok { break } - } - - // Trim trailing whitespace - while let last = tokens.last, case .text(let s, _) = last, s.trimmingCharacters(in: .whitespaces).isEmpty { - tokens.removeLast() - } - // Remove trailing '#' sequences - while let last = tokens.last, case .hash = last { - tokens.removeLast() - while let l = tokens.last, case .text(let s, _) = l, s.trimmingCharacters(in: .whitespaces).isEmpty { - tokens.removeLast() - } - } - while let last = tokens.last, case .text(let s, _) = last, s.trimmingCharacters(in: .whitespaces).isEmpty { - tokens.removeLast() - } - - // Remove spaces before hard breaks - var processed: [Token] = [] - for tok in tokens { - if case .hardBreak = tok { - while let l = processed.last, case .text(let s, _) = l, s.allSatisfy({ $0 == " " }) { - processed.removeLast() - } - } - processed.append(tok) - } - - let trimmedValue = processed.map { $0.text }.joined().trimmingCharacters(in: .whitespaces) - let children = MarkdownLanguage.parseInlineTokens(processed, input: context.input) - let node = MarkdownHeadingNode(value: trimmedValue, level: count) - children.forEach { node.addChild($0) } - context.currentNode.addChild(node) - } - } - - public class SetextHeadingBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard token is Token else { return false } - if context.index > 0 { - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { - // ok - } else if context.index != 0 { - return false - } - } - - var idx = context.index - var sawText = false - while idx < context.tokens.count { - guard let t = context.tokens[idx] as? Token else { return false } - if case .newline = t { break } - if case .eof = t { return false } - sawText = true - idx += 1 - } - guard sawText else { return false } - guard idx < context.tokens.count, let nl = context.tokens[idx] as? Token, case .newline = nl else { return false } - idx += 1 - guard idx < context.tokens.count else { return false } - - var kind: Token? - var count = 0 - while idx < context.tokens.count { - guard let tok = context.tokens[idx] as? Token else { return false } - switch tok { - case .dash: - if kind == nil { kind = tok } - if case .dash = kind! { count += 1; idx += 1 } else { return false } - case .equal: - if kind == nil { kind = tok } - if case .equal = kind! { count += 1; idx += 1 } else { return false } - case .text(let s, _): - if s.trimmingCharacters(in: .whitespaces).isEmpty { idx += 1 } else { return false } - case .newline, .eof: - break - default: - return false - } - if idx < context.tokens.count, let next = context.tokens[idx] as? Token { - if case .newline = next { break } - if case .eof = next { break } - } - } - if count == 0 { return false } - if idx < context.tokens.count, let endTok = context.tokens[idx] as? Token { - switch endTok { - case .newline, .eof: - return true - default: - return false - } - } - return false - } - public func build(context: inout CodeContext) { - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .newline = tok { - context.index += 1 - break - } else { - text += tok.text - context.index += 1 - } - } else { context.index += 1 } - } - var level: Int? - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .dash: - if level == nil { level = 2 } - context.index += 1 - case .equal: - if level == nil { level = 1 } - context.index += 1 - case .text(let s, _) where s.trimmingCharacters(in: .whitespaces).isEmpty: - context.index += 1 - case .newline: - context.index += 1 - let node = MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1) - context.currentNode.addChild(node) - return - case .eof: - context.index += 1 - let node = MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1) - context.currentNode.addChild(node) - return - default: - context.index += 1 - } - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownHeadingNode(value: text.trimmingCharacters(in: .whitespaces), level: level ?? 1)) - } - } - - // MARK: - List Parsing - - public class UnorderedListBuilder: CodeElementBuilder { - public init() {} - - private func lineIndent(before idx: Int, in context: CodeContext) -> Int? { - if idx == 0 { return 0 } - var i = idx - 1 - var count = 0 - while i >= 0 { - guard let tok = context.tokens[i] as? Token else { return nil } - switch tok { - case .newline: - return count - case .text(let s, _) where s.allSatisfy({ $0 == " " }): - count += s.count - i -= 1 - default: - return nil - } - } - return count - } - - private func isBullet(_ tok: Token) -> Bool { - switch tok { - case .dash, .star, .plus: return true - default: return false - } - } - - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token, isBullet(tok) else { return false } - guard context.index + 1 < context.tokens.count, - let next = context.tokens[context.index + 1] as? Token, - case .text(let s, _) = next, s.first?.isWhitespace == true else { - return false - } - if let ind = lineIndent(before: context.index, in: context) { return ind >= 0 } else { return false } - } - - public func build(context: inout CodeContext) { - func parseList(_ indent: Int, _ depth: Int) -> CodeNode { - let list = MarkdownUnorderedListNode(value: "", level: depth) - var isLoose = false - while context.index < context.tokens.count { - guard let bullet = context.tokens[context.index] as? Token, isBullet(bullet), lineIndent(before: context.index, in: context) == indent else { break } - let (node, loose) = parseItem(indent, depth) - if loose { isLoose = true } - list.addChild(node) - } - list.value = isLoose ? "loose" : "tight" - return list - } - - func parseItem(_ indent: Int, _ depth: Int) -> (CodeNode, Bool) { - var loose = false - // skip bullet and following whitespace - context.index += 1 - if context.index < context.tokens.count, - let t = context.tokens[context.index] as? Token, - case .text(let s, _) = t, s.first?.isWhitespace == true { - context.index += 1 - } - - let node = MarkdownListItemNode(value: "") - var text = "" - itemLoop: while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - switch tok { - case .newline: - context.index += 1 - // Check for blank line - if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { - loose = true - context.index += 1 - } - let start = context.index - var spaces = 0 - if start < context.tokens.count, let sTok = context.tokens[start] as? Token, case .text(let s, _) = sTok, s.allSatisfy({ $0 == " " }) { - spaces = s.count - context.index += 1 - } - if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, isBullet(next), spaces > indent { - let sub = parseList(spaces, depth + 1) - node.addChild(sub) - if context.index < context.tokens.count, let nextTok = context.tokens[context.index] as? Token, isBullet(nextTok), (lineIndent(before: context.index, in: context) ?? 0) <= indent { - break itemLoop - } - } else if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, isBullet(next), spaces == indent { - context.index = start - break itemLoop - } else if spaces > indent { - text += "\n" - } else if spaces < indent { - context.index = start - break itemLoop - } else { - text += "\n" - } - case .eof: - context.index += 1 - break itemLoop - default: - text += tok.text - context.index += 1 - } - } - node.value = text.trimmingCharacters(in: .whitespaces) - return (node, loose) - } - - if let ind = lineIndent(before: context.index, in: context) { - let list = parseList(ind, 1) - context.currentNode.addChild(list) - } - } - } - - public class OrderedListBuilder: CodeElementBuilder { - public init() {} - - private func lineIndent(before idx: Int, in context: CodeContext) -> Int? { - if idx == 0 { return 0 } - var i = idx - 1 - var count = 0 - while i >= 0 { - guard let tok = context.tokens[i] as? Token else { return nil } - switch tok { - case .newline: - return count - case .text(let s, _) where s.allSatisfy({ $0 == " " }): - count += s.count - i -= 1 - default: - return nil - } - } - return count - } - - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token, case .number = tok else { return false } - guard context.index + 1 < context.tokens.count, - let dot = context.tokens[context.index + 1] as? Token, case .dot = dot else { return false } - if let _ = lineIndent(before: context.index, in: context) { return true } - return false - } - - public func build(context: inout CodeContext) { - func parseList(_ indent: Int, _ depth: Int) -> CodeNode { - let list = MarkdownOrderedListNode(value: "", level: depth) - var isLoose = false - while context.index < context.tokens.count { - guard context.index + 1 < context.tokens.count, - let num = context.tokens[context.index] as? Token, case .number = num, - let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, - lineIndent(before: context.index, in: context) == indent else { break } - let (node, loose) = parseItem(indent, depth) - if loose { isLoose = true } - list.addChild(node) - } - list.value = isLoose ? "loose" : "tight" - return list - } - - func parseItem(_ indent: Int, _ depth: Int) -> (CodeNode, Bool) { - var loose = false - context.index += 2 - if context.index < context.tokens.count, - let t = context.tokens[context.index] as? Token, - case .text(let s, _) = t, s.first?.isWhitespace == true { - context.index += 1 - } - - let node = MarkdownOrderedListItemNode(value: "") - var text = "" - itemLoop: while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - switch tok { - case .newline: - context.index += 1 - if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { - loose = true - context.index += 1 - } - let start = context.index - var spaces = 0 - if start < context.tokens.count, let sTok = context.tokens[start] as? Token, case .text(let s, _) = sTok, s.allSatisfy({ $0 == " " }) { - spaces = s.count - context.index += 1 - } - if context.index + 1 < context.tokens.count, - let nextNum = context.tokens[context.index] as? Token, case .number = nextNum, - let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, - spaces > indent { - let sub = parseList(spaces, depth + 1) - node.addChild(sub) - if context.index + 1 < context.tokens.count, - let nextBullet = context.tokens[context.index] as? Token, case .number = nextBullet, - let ndot = context.tokens[context.index + 1] as? Token, case .dot = ndot, - (lineIndent(before: context.index, in: context) ?? 0) <= indent { - break itemLoop - } - } else if context.index + 1 < context.tokens.count, - let nextNum = context.tokens[context.index] as? Token, case .number = nextNum, - let dot = context.tokens[context.index + 1] as? Token, case .dot = dot, - spaces == indent { - context.index = start - break itemLoop - } else if spaces > indent { - text += "\n" - } else if spaces < indent { - context.index = start - break itemLoop - } else { - text += "\n" - } - case .eof: - context.index += 1 - break itemLoop - default: - text += tok.text - context.index += 1 - } - } - node.value = text.trimmingCharacters(in: .whitespaces) - return (node, loose) - } - - if let ind = lineIndent(before: context.index, in: context) { - let list = parseList(ind, 1) - context.currentNode.addChild(list) - } - } - } - - public class CodeBlockBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let first = token as? Token else { return false } - let fenceKind: String - switch first { - case .backtick: fenceKind = "`" - case .tilde: fenceKind = "~" - default: return false - } - var idx = context.index - var count = 0 - while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == fenceKind { - count += 1; idx += 1 - } - guard count >= 3 else { return false } - if context.index == 0 { return true } - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { - return true - } - return false - } - public func build(context: inout CodeContext) { - guard let startTok = context.tokens[context.index] as? Token else { return } - let fenceKind = startTok.kindDescription - var fenceLength = 0 - while context.index < context.tokens.count, let t = context.tokens[context.index] as? Token, t.kindDescription == fenceKind { - fenceLength += 1 - context.index += 1 - } - // capture info string until end of line and trim whitespace - var info = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .newline = tok { - context.index += 1 - break - } else { - info += tok.text - context.index += 1 - } - } else { - context.index += 1 - } - } - info = info.trimmingCharacters(in: .whitespaces) - let lang = info.split(whereSeparator: { $0.isWhitespace }).first.map(String.init) - - let blockStart = context.index - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - // check for closing fence at start of line - if tok.kindDescription == fenceKind && (context.index == blockStart || (context.index > blockStart && (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline")) { - var idx = context.index - var count = 0 - while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == fenceKind { - count += 1; idx += 1 - } - if count >= fenceLength { - context.index = idx - if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl { context.index += 1 } - context.currentNode.addChild(MarkdownCodeBlockNode(lang: lang, content: text)) - return - } - } - text += tok.text - context.index += 1 - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownCodeBlockNode(lang: lang, content: text)) - } - } - - public class BlockQuoteBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .greaterThan = tok { - if context.index == 0 { return true } - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { return true } - } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 // skip '>' - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .newline: - context.index += 1 - let node = MarkdownBlockQuoteNode(value: text.trimmingCharacters(in: .whitespaces)) - context.currentNode.addChild(node) - return - case .eof: - let node = MarkdownBlockQuoteNode(value: text.trimmingCharacters(in: .whitespaces)) - context.currentNode.addChild(node) - context.index += 1 - return - default: - text += tok.text - context.index += 1 - } - } else { context.index += 1 } - } - } - } - - public class IndentedCodeBlockBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .text(let s, _) = tok { - if (context.index == 0 || (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline") && s.hasPrefix(" ") { - return true - } - } - return false - } - public func build(context: inout CodeContext) { - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .newline: - context.index += 1 - if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, case .text(let s, _) = next, s.hasPrefix(" ") { - text += "\n" + String(s.dropFirst(4)) - context.index += 1 - } else { - context.currentNode.addChild(MarkdownCodeBlockNode(lang: nil, content: text)) - return - } - case .text(let s, _): - text += String(s.dropFirst(4)) - context.index += 1 - default: - text += tok.text - context.index += 1 - } - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownCodeBlockNode(lang: nil, content: text)) - } - } - - public class ThematicBreakBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - switch tok { - case .dash, .star, .underscore: - if context.index == 0 || (context.index > 0 && (context.tokens[context.index - 1] as? Token) is Token && (context.tokens[context.index - 1] as? Token)?.kindDescription == "newline") { - var count = 0 - var idx = context.index - while idx < context.tokens.count, let t = context.tokens[idx] as? Token, t.kindDescription == tok.kindDescription { - count += 1; idx += 1 - } - if count >= 3 { - return true - } - } - default: - break - } - return false - } - public func build(context: inout CodeContext) { - if context.index < context.tokens.count, - let tok = context.tokens[context.index] as? Token { - let kind = tok.kindDescription - while context.index < context.tokens.count { - if let t = context.tokens[context.index] as? Token, - t.kindDescription == kind { - context.index += 1 - } else { - break - } - } - } - if context.index < context.tokens.count, - let nl = context.tokens[context.index] as? Token, - case .newline = nl { - context.index += 1 - } - context.currentNode.addChild(MarkdownThematicBreakNode(value: "")) - } - } - - public class ImageBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .exclamation = tok, - context.index + 1 < context.tokens.count, - let next = context.tokens[context.index + 1] as? Token, - case .lbracket = next { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 2 // skip ![ - var alt = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .rbracket = tok { context.index += 1; break } - else { alt += tok.text; context.index += 1 } - } else { context.index += 1 } - } - var url = "" - if context.index < context.tokens.count, let lp = context.tokens[context.index] as? Token, case .lparen = lp { - context.index += 1 - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .rparen = tok { context.index += 1; break } - else { url += tok.text; context.index += 1 } - } else { context.index += 1 } - } - } - context.currentNode.addChild(MarkdownImageNode(alt: alt, url: url)) - } - } - - public class HTMLBlockBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .lessThan = tok, context.index == 0 { - let rest = String(context.input[tok.range.upperBound...]).lowercased() - return rest.hasPrefix("!doctype") || rest.hasPrefix("html") - } - return false - } - public func build(context: inout CodeContext) { - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { text += tok.text } - context.index += 1 - } - let closed = MarkdownLanguage.isHTMLClosed(text) - context.currentNode.addChild(MarkdownHtmlNode(value: text, closed: closed)) - } - } - - public class HTMLBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - return tok.kindDescription == "<" - } - public func build(context: inout CodeContext) { - context.index += 1 // skip < - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .greaterThan = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - let html = "<" + text + ">" - let closed = MarkdownLanguage.isHTMLClosed(html) - context.currentNode.addChild(MarkdownHtmlNode(value: text, closed: closed)) - } - } - - public class EntityBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .ampersand = tok { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .semicolon = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - let decoded = decode(text) - context.currentNode.addChild(MarkdownEntityNode(value: decoded)) - } - - private func decode(_ entity: String) -> String { - switch entity { - case "amp": return "&" - case "lt": return "<" - case "gt": return ">" - case "quot": return "\"" - case "apos": return "'" - default: - if entity.hasPrefix("#x") || entity.hasPrefix("#X") { - let hex = entity.dropFirst(2) - if let value = UInt32(hex, radix: 16), let scalar = UnicodeScalar(value) { - return String(Character(scalar)) - } - } else if entity.hasPrefix("#") { - let num = entity.dropFirst() - if let value = UInt32(num), let scalar = UnicodeScalar(value) { - return String(Character(scalar)) - } - } - return "&" + entity + ";" - } - } - } - public class StrikethroughBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard context.index + 1 < context.tokens.count else { return false } - guard let t1 = token as? Token, let t2 = context.tokens[context.index + 1] as? Token else { return false } - return t1.kindDescription == "~" && t2.kindDescription == "~" - } - public func build(context: inout CodeContext) { - context.index += 2 - var text = "" - while context.index + 1 < context.tokens.count { - if let t1 = context.tokens[context.index] as? Token, - let t2 = context.tokens[context.index + 1] as? Token, - t1.kindDescription == "~" && t2.kindDescription == "~" { - context.index += 2 - context.currentNode.addChild(MarkdownStrikethroughNode(value: text)) - return - } else if let tok = context.tokens[context.index] as? Token { - text += tok.text - context.index += 1 - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownStrikethroughNode(value: text)) - } - } - - public class AutoLinkBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .lessThan = tok { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .greaterThan = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - context.currentNode.addChild(MarkdownAutoLinkNode(url: text)) - } - } - - public class BareAutoLinkBuilder: CodeElementBuilder { - private static let regex: NSRegularExpression = { - let pattern = #"^((https?|ftp)://[^\s<>]+|www\.[^\s<>]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})"# - return try! NSRegularExpression(pattern: pattern, options: []) - }() - - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - let start = tok.range.lowerBound - let text = String(context.input[start...]) - let range = NSRange(location: 0, length: text.utf16.count) - if let m = Self.regex.firstMatch(in: text, range: range), m.range.location == 0 { - return true - } - return false - } - - public func build(context: inout CodeContext) { - guard let tok = context.tokens[context.index] as? Token else { return } - let start = tok.range.lowerBound - let text = String(context.input[start...]) - let range = NSRange(location: 0, length: text.utf16.count) - guard let m = Self.regex.firstMatch(in: text, range: range) else { return } - let endPos = context.input.index(start, offsetBy: m.range.length) - let url = String(context.input[start.. Bool { - guard let tok = token as? Token else { return false } - if case .pipe = tok { - if context.index == 0 { return true } - if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev { return true } - } - return false - } - func parseRow(_ context: inout CodeContext) -> [String] { - var cells: [String] = [] - var cell = "" - context.index += 1 // skip leading pipe - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - switch tok { - case .pipe: - cells.append(cell.trimmingCharacters(in: .whitespaces)) - cell = "" - context.index += 1 - case .newline, .eof: - cells.append(cell.trimmingCharacters(in: .whitespaces)) - if let last = cells.last, last.isEmpty { cells.removeLast() } - context.index += 1 - return cells - default: - cell += tok.text - context.index += 1 - } - } else { - context.index += 1 - } - } - if !cell.isEmpty || !cells.isEmpty { - cells.append(cell.trimmingCharacters(in: .whitespaces)) - } - return cells - } - - func parseDelimiter(_ context: inout CodeContext) -> [String]? { - guard context.index < context.tokens.count, - let first = context.tokens[context.index] as? Token, - case .pipe = first else { return nil } - var snapshot = context.snapshot() - let cells = parseRow(&context) - for cell in cells { - var trimmed = cell.trimmingCharacters(in: .whitespaces) - if trimmed.hasPrefix(":") { trimmed.removeFirst() } - if trimmed.hasSuffix(":") { trimmed.removeLast() } - if trimmed.count < 3 { context.restore(snapshot); return nil } - if !trimmed.allSatisfy({ $0 == "-" }) { - context.restore(snapshot); return nil - } - } - return cells - } - - public func build(context: inout CodeContext) { - var ctx = context - let header = parseRow(&ctx) - let startIndex = ctx.index - if let _ = parseDelimiter(&ctx) { - var rows: [[String]] = [] - while ctx.index < ctx.tokens.count, - let tok = ctx.tokens[ctx.index] as? Token, - case .pipe = tok { - rows.append(parseRow(&ctx)) - } - - let table = MarkdownTableNode() - let headerNode = MarkdownTableHeaderNode() - for cell in header { - let cellNode = MarkdownTableCellNode() - cellNode.addChild(MarkdownTextNode(value: cell)) - headerNode.addChild(cellNode) - } - table.addChild(headerNode) - - for row in rows { - let rowNode = MarkdownTableRowNode() - for cell in row { - let cellNode = MarkdownTableCellNode() - cellNode.addChild(MarkdownTextNode(value: cell)) - rowNode.addChild(cellNode) - } - table.addChild(rowNode) - } - - context = ctx - context.currentNode.addChild(table) - } else { - context.index = startIndex - context.currentNode.addChild(MarkdownTableNode(value: header.joined(separator: "|"))) - } - } - } - - public class FootnoteBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let lb = token as? Token, case .lbracket = lb else { return false } - guard context.index + 2 < context.tokens.count else { return false } - guard let first = context.tokens[context.index + 1] as? Token else { return false } - if case .text(let s, _) = first, s.starts(with: "^") { - var idx = context.index + 2 - while idx < context.tokens.count { - if let t = context.tokens[idx] as? Token { - if case .rbracket = t { return true } - if case .text = t { - idx += 1; continue - } - if case .number = t { - idx += 1; continue - } - } - break - } - } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 // skip [ - var id = "" - while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - if case .rbracket = tok { break } - id += tok.text - context.index += 1 - } - if id.hasPrefix("^") { id.removeFirst() } - if context.index < context.tokens.count { context.index += 1 } // skip ] - - if context.index < context.tokens.count, - let colon = context.tokens[context.index] as? Token, - case .text(let s, _) = colon, - s.trimmingCharacters(in: .whitespaces).hasPrefix(":") { - var text = s - context.index += 1 - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .newline = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - if text.hasPrefix(":") { text.removeFirst() } - let trimmed = text.trimmingCharacters(in: .whitespaces) - context.currentNode.addChild(MarkdownFootnoteDefinitionNode(identifier: id, text: trimmed)) - } else { - context.currentNode.addChild(MarkdownFootnoteReferenceNode(identifier: id)) - } - } - } - - public class LinkReferenceDefinitionBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard context.index + 3 < context.tokens.count else { return false } - guard let lb = token as? Token, - let txt = context.tokens[context.index + 1] as? Token, - let rb = context.tokens[context.index + 2] as? Token, - let colon = context.tokens[context.index + 3] as? Token else { return false } - if case .lbracket = lb, - case .text = txt, - case .rbracket = rb, - case .text(let s, _) = colon, - s.trimmingCharacters(in: .whitespaces).hasPrefix(":") { - return true - } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var id = "" - if context.index < context.tokens.count, let idTok = context.tokens[context.index] as? Token, case .text(let s, _) = idTok { - id = s - context.index += 1 - } - if context.index < context.tokens.count { context.index += 1 } // skip ] - var text = "" - if context.index < context.tokens.count, let colon = context.tokens[context.index] as? Token, case .text(let s, _) = colon { - text = s - context.index += 1 - } - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .newline = tok { context.index += 1; break } - else { text += tok.text; context.index += 1 } - } else { context.index += 1 } - } - var url = text.trimmingCharacters(in: .whitespaces) - if url.hasPrefix(":") { url.removeFirst() } - url = url.trimmingCharacters(in: .whitespaces) - let trimmedID = id.trimmingCharacters(in: .whitespaces) - context.linkReferences[trimmedID.lowercased()] = url - context.currentNode.addChild(MarkdownLinkReferenceDefinitionNode(identifier: trimmedID, url: url)) - } - } // Helper to parse inline content supporting nested emphasis/strong static func parseInline(context: inout CodeContext, closing: Token, count: Int) -> ([CodeNode], Bool) { @@ -1401,191 +135,6 @@ public struct MarkdownLanguage: CodeLanguage { return stack.isEmpty } - public class StrongBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard context.index + 1 < context.tokens.count else { return false } - guard let t1 = token as? Token, - let t2 = context.tokens[context.index + 1] as? Token else { return false } - switch (t1, t2) { - case (.star, .star), (.underscore, .underscore): - return true - default: - return false - } - } - public func build(context: inout CodeContext) { - let snap = context.snapshot() - guard let open = context.tokens[context.index] as? Token else { return } - context.index += 2 - let (children, ok) = MarkdownLanguage.parseInline(context: &context, closing: open, count: 2) - if ok { - let node = MarkdownStrongNode(value: "") - children.forEach { node.addChild($0) } - context.currentNode.addChild(node) - } else { - context.restore(snap) - context.currentNode.addChild(MarkdownTextNode(value: open.text + open.text)) - context.index += 2 - } - } - } - - public class EmphasisBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .star = tok { return true } - if case .underscore = tok { return true } - return false - } - public func build(context: inout CodeContext) { - let snap = context.snapshot() - guard let open = context.tokens[context.index] as? Token else { return } - context.index += 1 - let (children, ok) = MarkdownLanguage.parseInline(context: &context, closing: open, count: 1) - if ok { - let node = MarkdownEmphasisNode(value: "") - children.forEach { node.addChild($0) } - context.currentNode.addChild(node) - } else { - context.restore(snap) - context.currentNode.addChild(MarkdownTextNode(value: open.text)) - context.index += 1 - } - } - } - - public class InlineCodeBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .backtick = tok { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var text = "" - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token, case .backtick = tok { - context.index += 1 - let node = MarkdownInlineCodeNode(value: text) - context.currentNode.addChild(node) - return - } else if let tok = context.tokens[context.index] as? Token { - text += tok.text - context.index += 1 - } else { context.index += 1 } - } - let node = MarkdownInlineCodeNode(value: text) - context.currentNode.addChild(node) - } - } - - public class LinkBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - guard let tok = token as? Token else { return false } - if case .lbracket = tok { return true } - return false - } - public func build(context: inout CodeContext) { - context.index += 1 - var textTokens: [Token] = [] - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .rbracket = tok { - context.index += 1 - break - } else { - textTokens.append(tok) - context.index += 1 - } - } else { context.index += 1 } - } - let textNodes = MarkdownLanguage.parseInlineTokens(textTokens, input: context.input) - var url = "" - if context.index < context.tokens.count, let lparen = context.tokens[context.index] as? Token, case .lparen = lparen { - context.index += 1 - while context.index < context.tokens.count { - if let tok = context.tokens[context.index] as? Token { - if case .rparen = tok { - context.index += 1 - break - } else { - url += tok.text - context.index += 1 - } - } else { context.index += 1 } - } - } else if context.index + 2 < context.tokens.count, - let lb = context.tokens[context.index] as? Token, case .lbracket = lb, - let idTok = context.tokens[context.index + 1] as? Token, - let rb = context.tokens[context.index + 2] as? Token, case .rbracket = rb, - case .text(let id, _) = idTok { - context.index += 3 - let key = id.trimmingCharacters(in: .whitespaces).lowercased() - if let ref = context.linkReferences[key] { - url = ref - } - } - let node = MarkdownLinkNode(text: textNodes, url: url) - context.currentNode.addChild(node) - } - } - - public class ParagraphBuilder: CodeElementBuilder { - public init() {} - public func accept(context: CodeContext, token: any CodeToken) -> Bool { - if token is Token { return true } else { return false } - } - public func build(context: inout CodeContext) { - var tokens: [Token] = [] - var ended = false - while context.index < context.tokens.count { - guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue } - switch tok { - case .text, .star, .underscore, .backtick: - tokens.append(tok) - context.index += 1 - case .hardBreak: - while let last = tokens.last, case .text(let s, _) = last, s.allSatisfy({ $0 == " " }) { - tokens.removeLast() - } - tokens.append(tok) - context.index += 1 - case .newline: - context.index += 1 - ended = true - case .dash, .hash, .plus, .lbracket, - .greaterThan, .exclamation, .tilde, .equal, .lessThan, .ampersand, .semicolon, .pipe: - ended = true - case .number: - if context.index + 1 < context.tokens.count, - let dot = context.tokens[context.index + 1] as? Token, - case .dot = dot { - ended = true - } else { - tokens.append(tok) - context.index += 1 - } - case .eof: - context.index += 1 - ended = true - case .dot, .rbracket, .lparen, .rparen: - tokens.append(tok) - context.index += 1 - } - if ended { break } - } - - let value = tokens.map { $0.text }.joined() - let children = MarkdownLanguage.parseInlineTokens(tokens, input: context.input) - let node = MarkdownParagraphNode(value: value) - children.forEach { node.addChild($0) } - context.currentNode.addChild(node) - } - } public var tokenizer: CodeTokenizer { Tokenizer() } public var builders: [CodeElementBuilder] {