Skip to content

Commit 42f4767

Browse files
authored
Add block element support (#46)
* Add CommonMark block elements and GFM extensions * Update feature list and roadmap * Remove duplicated roadmap entry * Add definition lists and admonition support
1 parent 02caaae commit 42f4767

16 files changed

+697
-11
lines changed

MARKDOWN_PARSER.md

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ This document provides an overview of the Markdown parser built on top of the Sw
1212
- ✅ Fenced code blocks (```code```)
1313
- ✅ Block quotes (> quote) with multi-line merging
1414
- ✅ Lists (ordered and unordered) with automatic numbering
15-
- ✅ Task lists (- [ ] unchecked, - [x] checked) – GFM extension
1615
- ✅ Links ([text](URL) and reference style)
1716
- ✅ Images (![alt](URL))
1817
- ✅ Autolinks (<URL>)
1918
- ✅ Horizontal rules (---)
2019
- ✅ HTML inline elements
20+
- ✅ HTML block elements
2121
- ✅ Line break handling
2222

2323
### GitHub Flavored Markdown (GFM) Extensions
@@ -28,6 +28,12 @@ This document provides an overview of the Markdown parser built on top of the Sw
2828
### Academic Extensions
2929
-**Footnotes**: Definition and reference support ([^1]: footnote, [^1])
3030
-**Citations**: Academic citation support ([@author2023]: reference, [@author2023])
31+
-**Math formulas**: inline ($math$) and block ($$math$$)
32+
33+
### Other Extensions
34+
-**Definition lists**: term/definition pairs
35+
-**Admonitions**: note/warning/info blocks using `:::`
36+
-**Custom containers**: generic container syntax (`:::`)
3137

3238
### Advanced List Features
3339
-**Unordered lists**: supports `-`, `*`, `+` markers
@@ -657,11 +663,10 @@ When reporting bugs, include:
657663
## Future Roadmap
658664

659665
### Planned Features
660-
- [ ] **Math Support**: LaTeX-style math expressions (`$inline$`, `$$block$$`)
661-
- [ ] **Definition Lists**: Support for definition list syntax
662-
- [ ] **Admonitions**: Support for warning/info/note blocks
666+
- [x] **Definition Lists**: Support for definition list syntax
667+
- [x] **Admonitions**: Support for warning/info/note blocks
663668
- [ ] **Mermaid Diagrams**: Inline diagram support
664-
- [ ] **Custom Containers**: Generic container syntax (:::)
669+
- [x] **Custom Containers**: Generic container syntax (:::)
665670
- [ ] **Syntax Highlighting**: Code block syntax highlighting
666671
- [ ] **Export Formats**: HTML, PDF, and other output formats
667672

@@ -690,4 +695,4 @@ This project is licensed under the MIT License - see the LICENSE file for detail
690695

691696
---
692697

693-
*Last updated: 2025-07-18*
698+
*Last updated: 2025-07-20*
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import Foundation
2+
3+
public class MarkdownAdmonitionBuilder: CodeNodeBuilder {
4+
public init() {}
5+
6+
public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
7+
guard context.consuming + 2 < context.tokens.count,
8+
isStartOfLine(context),
9+
let c1 = context.tokens[context.consuming] as? MarkdownToken,
10+
let c2 = context.tokens[context.consuming + 1] as? MarkdownToken,
11+
let c3 = context.tokens[context.consuming + 2] as? MarkdownToken,
12+
c1.element == .colon, c2.element == .colon, c3.element == .colon else { return false }
13+
var idx = context.consuming + 3
14+
var name = ""
15+
while idx < context.tokens.count,
16+
let t = context.tokens[idx] as? MarkdownToken,
17+
t.element != .newline {
18+
name += t.text
19+
idx += 1
20+
}
21+
name = name.trimmingCharacters(in: .whitespaces)
22+
guard idx < context.tokens.count,
23+
let nl = context.tokens[idx] as? MarkdownToken,
24+
nl.element == .newline else { return false }
25+
idx += 1
26+
var innerTokens: [any CodeToken<MarkdownTokenElement>] = []
27+
while idx < context.tokens.count {
28+
if isStartOfLine(index: idx, tokens: context.tokens),
29+
idx + 2 < context.tokens.count,
30+
let e1 = context.tokens[idx] as? MarkdownToken,
31+
let e2 = context.tokens[idx + 1] as? MarkdownToken,
32+
let e3 = context.tokens[idx + 2] as? MarkdownToken,
33+
e1.element == .colon, e2.element == .colon, e3.element == .colon {
34+
idx += 3
35+
while idx < context.tokens.count,
36+
let t = context.tokens[idx] as? MarkdownToken,
37+
t.element != .newline { idx += 1 }
38+
if idx < context.tokens.count,
39+
let nl2 = context.tokens[idx] as? MarkdownToken,
40+
nl2.element == .newline { idx += 1 }
41+
break
42+
}
43+
innerTokens.append(context.tokens[idx])
44+
idx += 1
45+
}
46+
context.consuming = idx
47+
var subContext = CodeContext(current: DocumentNode(), tokens: innerTokens)
48+
let children = MarkdownInlineParser.parseInline(&subContext)
49+
let lower = name.lowercased()
50+
let node: MarkdownNodeBase
51+
if ["note", "warning", "info"].contains(lower) {
52+
let admon = AdmonitionNode(kind: lower)
53+
for c in children { admon.append(c) }
54+
node = admon
55+
} else {
56+
let container = CustomContainerNode(name: name)
57+
for c in children { container.append(c) }
58+
node = container
59+
}
60+
context.current.append(node)
61+
return true
62+
}
63+
64+
private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
65+
if context.consuming == 0 { return true }
66+
if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
67+
return prev.element == .newline
68+
}
69+
return false
70+
}
71+
72+
private func isStartOfLine(index: Int, tokens: [any CodeToken<MarkdownTokenElement>]) -> Bool {
73+
if index == 0 { return true }
74+
if index - 1 < tokens.count,
75+
let prev = tokens[index - 1] as? MarkdownToken {
76+
return prev.element == .newline
77+
}
78+
return false
79+
}
80+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import Foundation
2+
3+
public class MarkdownDefinitionListBuilder: CodeNodeBuilder {
4+
public init() {}
5+
6+
public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
7+
guard context.consuming < context.tokens.count,
8+
isStartOfLine(context) else { return false }
9+
let state = context.state as? MarkdownContextState ?? MarkdownContextState()
10+
if context.state == nil { context.state = state }
11+
12+
var idx = context.consuming
13+
var termTokens: [any CodeToken<MarkdownTokenElement>] = []
14+
while idx < context.tokens.count,
15+
let t = context.tokens[idx] as? MarkdownToken,
16+
t.element != .newline {
17+
termTokens.append(t)
18+
idx += 1
19+
}
20+
guard idx < context.tokens.count,
21+
let _ = context.tokens[idx] as? MarkdownToken,
22+
(context.tokens[idx] as! MarkdownToken).element == .newline else {
23+
state.currentDefinitionList = nil
24+
return false
25+
}
26+
idx += 1
27+
guard idx < context.tokens.count,
28+
let colon = context.tokens[idx] as? MarkdownToken,
29+
colon.element == .colon else {
30+
state.currentDefinitionList = nil
31+
return false
32+
}
33+
idx += 1
34+
if idx < context.tokens.count,
35+
let sp = context.tokens[idx] as? MarkdownToken,
36+
sp.element == .space {
37+
idx += 1
38+
}
39+
var defTokens: [any CodeToken<MarkdownTokenElement>] = []
40+
while idx < context.tokens.count,
41+
let t = context.tokens[idx] as? MarkdownToken,
42+
t.element != .newline {
43+
defTokens.append(t)
44+
idx += 1
45+
}
46+
context.consuming = idx
47+
if idx < context.tokens.count,
48+
let nl = context.tokens[idx] as? MarkdownToken,
49+
nl.element == .newline {
50+
context.consuming += 1
51+
}
52+
53+
var termContext = CodeContext(current: DocumentNode(), tokens: termTokens)
54+
let termChildren = MarkdownInlineParser.parseInline(&termContext)
55+
var defContext = CodeContext(current: DocumentNode(), tokens: defTokens)
56+
let defChildren = MarkdownInlineParser.parseInline(&defContext)
57+
58+
let item = DefinitionItemNode()
59+
let termNode = DefinitionTermNode()
60+
for c in termChildren { termNode.append(c) }
61+
let descNode = DefinitionDescriptionNode()
62+
for c in defChildren { descNode.append(c) }
63+
item.append(termNode)
64+
item.append(descNode)
65+
66+
if let list = state.currentDefinitionList {
67+
list.append(item)
68+
} else {
69+
let list = DefinitionListNode()
70+
list.append(item)
71+
context.current.append(list)
72+
state.currentDefinitionList = list
73+
}
74+
return true
75+
}
76+
77+
private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
78+
if context.consuming == 0 { return true }
79+
if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
80+
return prev.element == .newline
81+
}
82+
return false
83+
}
84+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import Foundation
2+
3+
public class MarkdownFencedCodeBuilder: CodeNodeBuilder {
4+
public init() {}
5+
6+
public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
7+
guard context.consuming < context.tokens.count,
8+
let token = context.tokens[context.consuming] as? MarkdownToken,
9+
token.element == .fencedCodeBlock,
10+
isStartOfLine(context) else { return false }
11+
context.consuming += 1
12+
let code = trimFence(token.text)
13+
let node = CodeBlockNode(source: code, language: nil)
14+
context.current.append(node)
15+
if context.consuming < context.tokens.count,
16+
let nl = context.tokens[context.consuming] as? MarkdownToken,
17+
nl.element == .newline {
18+
context.consuming += 1
19+
}
20+
return true
21+
}
22+
23+
private func trimFence(_ text: String) -> String {
24+
var lines = text.split(separator: "\n")
25+
guard lines.count >= 2 else { return text }
26+
lines.removeFirst()
27+
if let last = lines.last, last.starts(with: "```") {
28+
lines.removeLast()
29+
}
30+
return lines.joined(separator: "\n")
31+
}
32+
33+
private func isStartOfLine(_ context: CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
34+
if context.consuming == 0 { return true }
35+
if let prev = context.tokens[context.consuming - 1] as? MarkdownToken {
36+
return prev.element == .newline
37+
}
38+
return false
39+
}
40+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import Foundation
2+
3+
public class MarkdownFormulaBlockBuilder: CodeNodeBuilder {
4+
public init() {}
5+
6+
public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
7+
guard context.consuming < context.tokens.count,
8+
let token = context.tokens[context.consuming] as? MarkdownToken,
9+
token.element == .formulaBlock else { return false }
10+
context.consuming += 1
11+
let expr = trimFormula(token.text)
12+
let node = FormulaBlockNode(expression: expr)
13+
context.current.append(node)
14+
if context.consuming < context.tokens.count,
15+
let nl = context.tokens[context.consuming] as? MarkdownToken,
16+
nl.element == .newline {
17+
context.consuming += 1
18+
}
19+
return true
20+
}
21+
22+
private func trimFormula(_ text: String) -> String {
23+
var t = text
24+
if t.hasPrefix("$$") { t.removeFirst(2) }
25+
if t.hasSuffix("$$") { t.removeLast(2) }
26+
if t.hasPrefix("\\[") { t.removeFirst(2) }
27+
if t.hasSuffix("\\]") { t.removeLast(2) }
28+
return t.trimmingCharacters(in: .whitespacesAndNewlines)
29+
}
30+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import Foundation
2+
3+
public class MarkdownHTMLBlockBuilder: CodeNodeBuilder {
4+
public init() {}
5+
6+
public func build(from context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
7+
guard context.consuming < context.tokens.count,
8+
let token = context.tokens[context.consuming] as? MarkdownToken,
9+
(token.element == .htmlBlock || token.element == .htmlUnclosedBlock) else { return false }
10+
context.consuming += 1
11+
let node = HTMLBlockNode(name: "", content: token.text)
12+
context.current.append(node)
13+
if context.consuming < context.tokens.count,
14+
let nl = context.tokens[context.consuming] as? MarkdownToken,
15+
nl.element == .newline {
16+
context.consuming += 1
17+
}
18+
return true
19+
}
20+
}

Sources/SwiftParser/Markdown/Builders/MarkdownInlineParser.swift

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ struct MarkdownInlineParser {
1313
if stopAt.contains(token.element) { break }
1414

1515
switch token.element {
16-
case .asterisk, .underscore:
16+
case .asterisk, .underscore, .tilde:
1717
let marker = token.element
1818
var count = 0
1919
while context.consuming < context.tokens.count,
@@ -22,7 +22,12 @@ struct MarkdownInlineParser {
2222
count += 1
2323
context.consuming += 1
2424
}
25-
handleDelimiter(marker: marker, count: count, nodes: &nodes, stack: &delimiters)
25+
if marker == .tilde && count < 2 {
26+
let text = String(repeating: "~", count: count)
27+
nodes.append(TextNode(content: text))
28+
} else {
29+
handleDelimiter(marker: marker, count: count, nodes: &nodes, stack: &delimiters)
30+
}
2631
case .inlineCode:
2732
nodes.append(InlineCodeNode(code: trimBackticks(token.text)))
2833
context.consuming += 1
@@ -90,7 +95,14 @@ struct MarkdownInlineParser {
9095

9196
while remaining > 0, let openIdx = stack.lastIndex(where: { $0.marker == marker }) {
9297
let open = stack.remove(at: openIdx)
93-
let closeCount = min(open.count, remaining)
98+
var closeCount = min(open.count, remaining)
99+
if marker == .tilde {
100+
guard open.count >= 2 && remaining >= 2 else {
101+
stack.append(open)
102+
break
103+
}
104+
closeCount = 2
105+
}
94106

95107
let start = open.index + 1
96108
let removedCount = nodes.count - open.index
@@ -102,7 +114,12 @@ struct MarkdownInlineParser {
102114
}
103115
}
104116

105-
let node: MarkdownNodeBase = (closeCount >= 2) ? StrongNode(content: "") : EmphasisNode(content: "")
117+
let node: MarkdownNodeBase
118+
if marker == .tilde {
119+
node = StrikeNode(content: "")
120+
} else {
121+
node = (closeCount >= 2) ? StrongNode(content: "") : EmphasisNode(content: "")
122+
}
106123
for child in content { node.append(child) }
107124
nodes.append(node)
108125

@@ -119,7 +136,7 @@ struct MarkdownInlineParser {
119136
private static func parseLinkOrFootnote(_ context: inout CodeContext<MarkdownNodeElement, MarkdownTokenElement>) -> MarkdownNodeBase? {
120137
let start = context.consuming
121138
context.consuming += 1
122-
// Footnote reference [^id]
139+
// Footnote reference [^id] or citation [@id]
123140
if context.consuming < context.tokens.count,
124141
let caret = context.tokens[context.consuming] as? MarkdownToken,
125142
caret.element == .caret {
@@ -136,6 +153,22 @@ struct MarkdownInlineParser {
136153
rb.element == .rightBracket else { context.consuming = start; return nil }
137154
context.consuming += 1
138155
return FootnoteNode(identifier: ident, content: "", referenceText: nil, range: rb.range)
156+
} else if context.consuming < context.tokens.count,
157+
let at = context.tokens[context.consuming] as? MarkdownToken,
158+
at.element == .text, at.text == "@" {
159+
context.consuming += 1
160+
var ident = ""
161+
while context.consuming < context.tokens.count,
162+
let t = context.tokens[context.consuming] as? MarkdownToken,
163+
t.element != .rightBracket {
164+
ident += t.text
165+
context.consuming += 1
166+
}
167+
guard context.consuming < context.tokens.count,
168+
let rb = context.tokens[context.consuming] as? MarkdownToken,
169+
rb.element == .rightBracket else { context.consuming = start; return nil }
170+
context.consuming += 1
171+
return CitationReferenceNode(identifier: ident)
139172
}
140173

141174
let textNodes = parseInline(&context, stopAt: [.rightBracket])

0 commit comments

Comments
 (0)