Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions MARKDOWN_PARSER.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ This document provides an overview of the Markdown parser built on top of the Sw
- ✅ Fenced code blocks (```code```)
- ✅ Block quotes (> quote) with multi-line merging
- ✅ Lists (ordered and unordered) with automatic numbering
- ✅ Task lists (- [ ] unchecked, - [x] checked) – GFM extension
- ✅ Links ([text](URL) and reference style)
- ✅ Images (![alt](URL))
- ✅ Autolinks (<URL>)
- ✅ Horizontal rules (---)
- ✅ HTML inline elements
- ✅ HTML block elements
- ✅ Line break handling

### GitHub Flavored Markdown (GFM) Extensions
Expand All @@ -28,6 +28,12 @@ This document provides an overview of the Markdown parser built on top of the Sw
### Academic Extensions
- ✅ **Footnotes**: Definition and reference support ([^1]: footnote, [^1])
- ✅ **Citations**: Academic citation support ([@author2023]: reference, [@author2023])
- ✅ **Math formulas**: inline ($math$) and block ($$math$$)

### Other Extensions
- ✅ **Definition lists**: term/definition pairs
- ✅ **Admonitions**: note/warning/info blocks using `:::`
- ✅ **Custom containers**: generic container syntax (`:::`)

### Advanced List Features
- ✅ **Unordered lists**: supports `-`, `*`, `+` markers
Expand Down Expand Up @@ -657,11 +663,10 @@ When reporting bugs, include:
## Future Roadmap

### Planned Features
- [ ] **Math Support**: LaTeX-style math expressions (`$inline$`, `$$block$$`)
- [ ] **Definition Lists**: Support for definition list syntax
- [ ] **Admonitions**: Support for warning/info/note blocks
- [x] **Definition Lists**: Support for definition list syntax
- [x] **Admonitions**: Support for warning/info/note blocks
- [ ] **Mermaid Diagrams**: Inline diagram support
- [ ] **Custom Containers**: Generic container syntax (:::)
- [x] **Custom Containers**: Generic container syntax (:::)
- [ ] **Syntax Highlighting**: Code block syntax highlighting
- [ ] **Export Formats**: HTML, PDF, and other output formats

Expand Down Expand Up @@ -690,4 +695,4 @@ This project is licensed under the MIT License - see the LICENSE file for detail

---

*Last updated: 2025-07-18*
*Last updated: 2025-07-20*
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import Foundation

public class MarkdownAdmonitionBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming + 2 < context.tokens.count,
isStartOfLine(context),
let c1 = context.tokens[context.consuming] as? MarkdownToken,
let c2 = context.tokens[context.consuming + 1] as? MarkdownToken,
let c3 = context.tokens[context.consuming + 2] as? MarkdownToken,
c1.element == .colon, c2.element == .colon, c3.element == .colon else { return false }
var idx = context.consuming + 3
var name = ""
while idx < context.tokens.count,
let t = context.tokens[idx] as? MarkdownToken,
t.element != .newline {
name += t.text
idx += 1
}
name = name.trimmingCharacters(in: .whitespaces)
guard idx < context.tokens.count,
let nl = context.tokens[idx] as? MarkdownToken,
nl.element == .newline else { return false }
idx += 1
var innerTokens: [any CodeToken<MarkdownTokenElement>] = []
while idx < context.tokens.count {
if isStartOfLine(index: idx, tokens: context.tokens),
idx + 2 < context.tokens.count,
let e1 = context.tokens[idx] as? MarkdownToken,
let e2 = context.tokens[idx + 1] as? MarkdownToken,
let e3 = context.tokens[idx + 2] as? MarkdownToken,
e1.element == .colon, e2.element == .colon, e3.element == .colon {
idx += 3
while idx < context.tokens.count,
let t = context.tokens[idx] as? MarkdownToken,
t.element != .newline { idx += 1 }
if idx < context.tokens.count,
let nl2 = context.tokens[idx] as? MarkdownToken,
nl2.element == .newline { idx += 1 }
break
}
innerTokens.append(context.tokens[idx])
idx += 1
}
context.consuming = idx
var subContext = CodeContext(current: DocumentNode(), tokens: innerTokens)
let children = MarkdownInlineParser.parseInline(&subContext)
let lower = name.lowercased()
let node: MarkdownNodeBase
if ["note", "warning", "info"].contains(lower) {
let admon = AdmonitionNode(kind: lower)
for c in children { admon.append(c) }
node = admon
} else {
let container = CustomContainerNode(name: name)
for c in children { container.append(c) }
node = container
}
context.current.append(node)
return true
}

private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
if context.consuming == 0 { return true }
if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
return prev.element == .newline
}
return false
}

private func isStartOfLine(index: Int, tokens: [any CodeToken<MarkdownTokenElement>]) -> Bool {
if index == 0 { return true }
if index - 1 < tokens.count,
let prev = tokens[index - 1] as? MarkdownToken {
return prev.element == .newline
}
return false
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import Foundation

public class MarkdownDefinitionListBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming < context.tokens.count,
isStartOfLine(context) else { return false }
let state = context.state as? MarkdownContextState ?? MarkdownContextState()
if context.state == nil { context.state = state }

var idx = context.consuming
var termTokens: [any CodeToken<MarkdownTokenElement>] = []
while idx < context.tokens.count,
let t = context.tokens[idx] as? MarkdownToken,
t.element != .newline {
termTokens.append(t)
idx += 1
}
guard idx < context.tokens.count,
let _ = context.tokens[idx] as? MarkdownToken,
(context.tokens[idx] as! MarkdownToken).element == .newline else {
state.currentDefinitionList = nil
return false
}
idx += 1
guard idx < context.tokens.count,
let colon = context.tokens[idx] as? MarkdownToken,
colon.element == .colon else {
state.currentDefinitionList = nil
return false
}
idx += 1
if idx < context.tokens.count,
let sp = context.tokens[idx] as? MarkdownToken,
sp.element == .space {
idx += 1
}
var defTokens: [any CodeToken<MarkdownTokenElement>] = []
while idx < context.tokens.count,
let t = context.tokens[idx] as? MarkdownToken,
t.element != .newline {
defTokens.append(t)
idx += 1
}
context.consuming = idx
if idx < context.tokens.count,
let nl = context.tokens[idx] as? MarkdownToken,
nl.element == .newline {
context.consuming += 1
}

var termContext = CodeContext(current: DocumentNode(), tokens: termTokens)
let termChildren = MarkdownInlineParser.parseInline(&termContext)
var defContext = CodeContext(current: DocumentNode(), tokens: defTokens)
let defChildren = MarkdownInlineParser.parseInline(&defContext)

let item = DefinitionItemNode()
let termNode = DefinitionTermNode()
for c in termChildren { termNode.append(c) }
let descNode = DefinitionDescriptionNode()
for c in defChildren { descNode.append(c) }
item.append(termNode)
item.append(descNode)

if let list = state.currentDefinitionList {
list.append(item)
} else {
let list = DefinitionListNode()
list.append(item)
context.current.append(list)
state.currentDefinitionList = list
}
return true
}

private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
if context.consuming == 0 { return true }
if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
return prev.element == .newline
}
return false
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import Foundation

public class MarkdownFencedCodeBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming < context.tokens.count,
let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .fencedCodeBlock,
isStartOfLine(context) else { return false }
context.consuming += 1
let code = trimFence(token.text)
let node = CodeBlockNode(source: code, language: nil)
context.current.append(node)
if context.consuming < context.tokens.count,
let nl = context.tokens[context.consuming] as? MarkdownToken,
nl.element == .newline {
context.consuming += 1
}
return true
}

private func trimFence(_ text: String) -> String {
var lines = text.split(separator: "\n")
guard lines.count >= 2 else { return text }
lines.removeFirst()
if let last = lines.last, last.starts(with: "```") {
lines.removeLast()
}
return lines.joined(separator: "\n")
}

private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
if context.consuming == 0 { return true }
if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
return prev.element == .newline
}
return false
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import Foundation

public class MarkdownFormulaBlockBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming < context.tokens.count,
let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .formulaBlock else { return false }
context.consuming += 1
let expr = trimFormula(token.text)
let node = FormulaBlockNode(expression: expr)
context.current.append(node)
if context.consuming < context.tokens.count,
let nl = context.tokens[context.consuming] as? MarkdownToken,
nl.element == .newline {
context.consuming += 1
}
return true
}

private func trimFormula(_ text: String) -> String {
var t = text
if t.hasPrefix("$$") { t.removeFirst(2) }
if t.hasSuffix("$$") { t.removeLast(2) }
if t.hasPrefix("\\[") { t.removeFirst(2) }
if t.hasSuffix("\\]") { t.removeLast(2) }
return t.trimmingCharacters(in: .whitespacesAndNewlines)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import Foundation

public class MarkdownHTMLBlockBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming < context.tokens.count,
let token = context.tokens[context.consuming] as? MarkdownToken,
(token.element == .htmlBlock || token.element == .htmlUnclosedBlock) else { return false }
context.consuming += 1
let node = HTMLBlockNode(name: "", content: token.text)
context.current.append(node)
if context.consuming < context.tokens.count,
let nl = context.tokens[context.consuming] as? MarkdownToken,
nl.element == .newline {
context.consuming += 1
}
return true
}
}
43 changes: 38 additions & 5 deletions Sources/SwiftParser/Markdown/Builders/MarkdownInlineParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ struct MarkdownInlineParser {
if stopAt.contains(token.element) { break }

switch token.element {
case .asterisk, .underscore:
case .asterisk, .underscore, .tilde:
let marker = token.element
var count = 0
while context.consuming < context.tokens.count,
Expand All @@ -22,7 +22,12 @@ struct MarkdownInlineParser {
count += 1
context.consuming += 1
}
handleDelimiter(marker: marker, count: count, nodes: &nodes, stack: &delimiters)
if marker == .tilde && count < 2 {
let text = String(repeating: "~", count: count)
nodes.append(TextNode(content: text))
} else {
handleDelimiter(marker: marker, count: count, nodes: &nodes, stack: &delimiters)
}
case .inlineCode:
nodes.append(InlineCodeNode(code: trimBackticks(token.text)))
context.consuming += 1
Expand Down Expand Up @@ -90,7 +95,14 @@ struct MarkdownInlineParser {

while remaining > 0, let openIdx = stack.lastIndex(where: { $0.marker == marker }) {
let open = stack.remove(at: openIdx)
let closeCount = min(open.count, remaining)
var closeCount = min(open.count, remaining)
if marker == .tilde {
guard open.count >= 2 && remaining >= 2 else {
stack.append(open)
break
}
closeCount = 2
}

let start = open.index + 1
let removedCount = nodes.count - open.index
Expand All @@ -102,7 +114,12 @@ struct MarkdownInlineParser {
}
}

let node: MarkdownNodeBase = (closeCount >= 2) ? StrongNode(content: "") : EmphasisNode(content: "")
let node: MarkdownNodeBase
if marker == .tilde {
node = StrikeNode(content: "")
} else {
node = (closeCount >= 2) ? StrongNode(content: "") : EmphasisNode(content: "")
}
for child in content { node.append(child) }
nodes.append(node)

Expand All @@ -119,7 +136,7 @@ struct MarkdownInlineParser {
private static func parseLinkOrFootnote(_ context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> MarkdownNodeBase? {
let start = context.consuming
context.consuming += 1
// Footnote reference [^id]
// Footnote reference [^id] or citation [@id]
if context.consuming < context.tokens.count,
let caret = context.tokens[context.consuming] as? MarkdownToken,
caret.element == .caret {
Expand All @@ -136,6 +153,22 @@ struct MarkdownInlineParser {
rb.element == .rightBracket else { context.consuming = start; return nil }
context.consuming += 1
return FootnoteNode(identifier: ident, content: "", referenceText: nil, range: rb.range)
} else if context.consuming < context.tokens.count,
let at = context.tokens[context.consuming] as? MarkdownToken,
at.element == .text, at.text == "@" {
context.consuming += 1
var ident = ""
while context.consuming < context.tokens.count,
let t = context.tokens[context.consuming] as? MarkdownToken,
t.element != .rightBracket {
ident += t.text
context.consuming += 1
}
guard context.consuming < context.tokens.count,
let rb = context.tokens[context.consuming] as? MarkdownToken,
rb.element == .rightBracket else { context.consuming = start; return nil }
context.consuming += 1
return CitationReferenceNode(identifier: ident)
}

let textNodes = parseInline(&context, stopAt: [.rightBracket])
Expand Down
Loading