Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions Sources/SwiftParser/Core/CodeParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ public final class CodeParser<Node, Token> where Node: CodeNodeElement, Token: C
var context = CodeContext(current: root, tokens: tokens, state: language.state(of: normalized))

while context.consuming < context.tokens.count {
// Stop at EOF without recording an error
if let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .eof {
break
}

var matched = false
for builder in language.builders {
if builder.build(from: &context) {
Expand All @@ -22,13 +28,11 @@ public final class CodeParser<Node, Token> where Node: CodeNodeElement, Token: C
}

if !matched {
// If no consumer matched, we have an unrecognized token
// If no builder matched, record an error and skip the token
let token = context.tokens[context.consuming]
let error = CodeError("Unrecognized token: \(token.element)", range: token.range)
context.errors.append(error)
context.consuming += 1 // Skip the unrecognized token
} else {
break // Exit the loop if a consumer successfully processed tokens
context.consuming += 1
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import Foundation

public class MarkdownBlockquoteBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming < context.tokens.count,
let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .gt,
isStartOfLine(context) else { return false }
context.consuming += 1
// optional leading space
if context.consuming < context.tokens.count,
let space = context.tokens[context.consuming] as? MarkdownToken,
space.element == .space {
context.consuming += 1
}
// Parse inline content until a newline or EOF inside the blockquote
let children = MarkdownInlineParser.parseInline(&context)
let node = BlockquoteNode()
for child in children { node.append(child) }
context.current.append(node)
if context.consuming < context.tokens.count,
let nl = context.tokens[context.consuming] as? MarkdownToken,
nl.element == .newline {
context.consuming += 1
}
return true
}

private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
if context.consuming == 0 { return true }
if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
return prev.element == .newline
}
return false
}
}
49 changes: 49 additions & 0 deletions Sources/SwiftParser/Markdown/Builders/MarkdownHeadingBuilder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import Foundation

public class MarkdownHeadingBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming < context.tokens.count,
let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .hash,
isStartOfLine(context)
else { return false }

var level = 0
var idx = context.consuming
while idx < context.tokens.count,
let t = context.tokens[idx] as? MarkdownToken,
t.element == .hash,
level < 6 {
level += 1
idx += 1
}
guard idx < context.tokens.count,
let space = context.tokens[idx] as? MarkdownToken,
space.element == .space else { return false }
idx += 1

context.consuming = idx
// Parse inline content until a newline or EOF
var children = MarkdownInlineParser.parseInline(&context)
let node = HeaderNode(level: level)
for child in children { node.append(child) }
context.current.append(node)

if context.consuming < context.tokens.count,
let nl = context.tokens[context.consuming] as? MarkdownToken,
nl.element == .newline {
context.consuming += 1
}
return true
}

private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
if context.consuming == 0 { return true }
if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
return prev.element == .newline
}
return false
}
}
242 changes: 242 additions & 0 deletions Sources/SwiftParser/Markdown/Builders/MarkdownInlineParser.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
import Foundation

struct MarkdownInlineParser {
static func parseInline(
_ context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>,
stopAt: Set<MarkdownTokenElement> = [.newline, .eof]
) -> [MarkdownNodeBase] {
var nodes: [MarkdownNodeBase] = []
var delimiters: [Delimiter] = []

while context.consuming < context.tokens.count {
guard let token = context.tokens[context.consuming] as? MarkdownToken else { break }
if stopAt.contains(token.element) { break }

switch token.element {
case .asterisk, .underscore:
let marker = token.element
var count = 0
while context.consuming < context.tokens.count,
let t = context.tokens[context.consuming] as? MarkdownToken,
t.element == marker {
count += 1
context.consuming += 1
}
handleDelimiter(marker: marker, count: count, nodes: &nodes, stack: &delimiters)
case .inlineCode:
nodes.append(InlineCodeNode(code: trimBackticks(token.text)))
context.consuming += 1
case .formula:
nodes.append(FormulaNode(expression: trimFormula(token.text)))
context.consuming += 1
case .htmlTag, .htmlBlock, .htmlUnclosedBlock, .htmlEntity:
nodes.append(HTMLNode(content: token.text))
context.consuming += 1
case .exclamation:
if let image = parseImage(&context) {
nodes.append(image)
} else {
nodes.append(TextNode(content: token.text))
context.consuming += 1
}
case .leftBracket:
if let link = parseLinkOrFootnote(&context) {
nodes.append(link)
} else {
nodes.append(TextNode(content: token.text))
context.consuming += 1
}
case .autolink, .url:
let url = trimAutolink(token.text)
let link = LinkNode(url: url, title: url)
nodes.append(link)
context.consuming += 1
default:
let shouldMerge: Bool
if let lastIndex = nodes.indices.last,
let _ = nodes[lastIndex] as? TextNode,
!delimiters.contains(where: { $0.index == lastIndex }) {
shouldMerge = true
} else {
shouldMerge = false
}

if shouldMerge, let last = nodes.last as? TextNode {
last.content += token.text
} else {
nodes.append(TextNode(content: token.text))
}
context.consuming += 1
}
}

return nodes
}


private struct Delimiter {
var marker: MarkdownTokenElement
var count: Int
var index: Int
}

private static func handleDelimiter(
marker: MarkdownTokenElement,
count: Int,
nodes: inout [MarkdownNodeBase],
stack: inout [Delimiter]
) {
var remaining = count

while remaining > 0, let openIdx = stack.lastIndex(where: { $0.marker == marker }) {
let open = stack.remove(at: openIdx)
let closeCount = min(open.count, remaining)

let start = open.index + 1
let removedCount = nodes.count - open.index
let content = Array(nodes[start..<nodes.count])
nodes.removeSubrange(open.index..<nodes.count)
for i in 0..<stack.count {
if stack[i].index >= open.index {
stack[i].index -= removedCount - 1
}
}

let node: MarkdownNodeBase = (closeCount >= 2) ? StrongNode(content: "") : EmphasisNode(content: "")
for child in content { node.append(child) }
nodes.append(node)

remaining -= closeCount
}

if remaining > 0 {
let text = String(repeating: marker.rawValue, count: remaining)
nodes.append(TextNode(content: text))
stack.append(Delimiter(marker: marker, count: remaining, index: nodes.count - 1))
}
}

private static func parseLinkOrFootnote(_ context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> MarkdownNodeBase? {
let start = context.consuming
context.consuming += 1
// Footnote reference [^id]
if context.consuming < context.tokens.count,
let caret = context.tokens[context.consuming] as? MarkdownToken,
caret.element == .caret {
context.consuming += 1
var ident = ""
while context.consuming < context.tokens.count,
let t = context.tokens[context.consuming] as? MarkdownToken,
t.element != .rightBracket {
ident += t.text
context.consuming += 1
}
guard context.consuming < context.tokens.count,
let rb = context.tokens[context.consuming] as? MarkdownToken,
rb.element == .rightBracket else { context.consuming = start; return nil }
context.consuming += 1
return FootnoteNode(identifier: ident, content: "", referenceText: nil, range: rb.range)
}

let textNodes = parseInline(&context, stopAt: [.rightBracket])
guard context.consuming < context.tokens.count,
let rb = context.tokens[context.consuming] as? MarkdownToken,
rb.element == .rightBracket else { context.consuming = start; return nil }
context.consuming += 1

// Inline link [text](url)
if context.consuming < context.tokens.count,
let lp = context.tokens[context.consuming] as? MarkdownToken,
lp.element == .leftParen {
context.consuming += 1
var url = ""
while context.consuming < context.tokens.count,
let t = context.tokens[context.consuming] as? MarkdownToken,
t.element != .rightParen {
url += t.text
context.consuming += 1
}
guard context.consuming < context.tokens.count,
let rp = context.tokens[context.consuming] as? MarkdownToken,
rp.element == .rightParen else { context.consuming = start; return nil }
context.consuming += 1
let link = LinkNode(url: url, title: "")
for child in textNodes { link.append(child) }
return link
}

// Reference link [text][id]
if context.consuming < context.tokens.count,
let lb = context.tokens[context.consuming] as? MarkdownToken,
lb.element == .leftBracket {
context.consuming += 1
var id = ""
while context.consuming < context.tokens.count,
let t = context.tokens[context.consuming] as? MarkdownToken,
t.element != .rightBracket {
id += t.text
context.consuming += 1
}
guard context.consuming < context.tokens.count,
let rb2 = context.tokens[context.consuming] as? MarkdownToken,
rb2.element == .rightBracket else { context.consuming = start; return nil }
context.consuming += 1
let ref = ReferenceNode(identifier: id, url: "", title: "")
for child in textNodes { ref.append(child) }
return ref
}

context.consuming = start
return nil
}

private static func parseImage(_ context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> MarkdownNodeBase? {
guard context.consuming + 1 < context.tokens.count,
let lb = context.tokens[context.consuming + 1] as? MarkdownToken,
lb.element == .leftBracket else { return nil }
context.consuming += 2
let altNodes = parseInline(&context, stopAt: [.rightBracket])
guard context.consuming < context.tokens.count,
let rb = context.tokens[context.consuming] as? MarkdownToken,
rb.element == .rightBracket else { context.consuming -= 2; return nil }
context.consuming += 1
guard context.consuming < context.tokens.count,
let lp = context.tokens[context.consuming] as? MarkdownToken,
lp.element == .leftParen else { context.consuming -= 3; return nil }
context.consuming += 1
var url = ""
while context.consuming < context.tokens.count,
let t = context.tokens[context.consuming] as? MarkdownToken,
t.element != .rightParen {
url += t.text
context.consuming += 1
}
guard context.consuming < context.tokens.count,
let rp = context.tokens[context.consuming] as? MarkdownToken,
rp.element == .rightParen else { context.consuming -= 4; return nil }
context.consuming += 1
let alt = altNodes.compactMap { ($0 as? TextNode)?.content }.joined()
return ImageNode(url: url, alt: alt)
}

private static func trimBackticks(_ text: String) -> String {
var t = text
while t.hasPrefix("`") { t.removeFirst() }
while t.hasSuffix("`") { t.removeLast() }
return t
}

private static func trimFormula(_ text: String) -> String {
var t = text
if t.hasPrefix("$") { t.removeFirst() }
if t.hasSuffix("$") { t.removeLast() }
return t
}

private static func trimAutolink(_ text: String) -> String {
if text.hasPrefix("<") && text.hasSuffix(">") {
return String(text.dropFirst().dropLast())
}
return text
}
}
14 changes: 14 additions & 0 deletions Sources/SwiftParser/Markdown/Builders/MarkdownNewlineBuilder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import Foundation

public class MarkdownNewlineBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming < context.tokens.count,
let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .newline else { return false }
context.consuming += 1
context.current = context.current.parent ?? context.current
return true
}
}
Loading