Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 204 additions & 51 deletions Sources/SwiftParser/Languages/MarkdownLanguage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ public struct MarkdownLanguage: CodeLanguage {
case text
case listItem
case orderedListItem
case unorderedList
case orderedList
case emphasis
case strong
case codeBlock
Expand Down Expand Up @@ -402,88 +404,239 @@ public struct MarkdownLanguage: CodeLanguage {
}
}

public class ListItemBuilder: CodeElementBuilder {
// MARK: - List Parsing

public class UnorderedListBuilder: CodeElementBuilder {
public init() {}
public func accept(context: CodeContext, token: any CodeToken) -> Bool {
guard let tok = token as? Token else { return false }
switch tok {
case .dash, .star, .plus:
if context.index + 1 < context.tokens.count,
let next = context.tokens[context.index + 1] as? Token,
case .text(let s, _) = next,
s.first?.isWhitespace == true {
if context.index == 0 { return true }
if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev {
return true
}

private func lineIndent(before idx: Int, in context: CodeContext) -> Int? {
if idx == 0 { return 0 }
var i = idx - 1
var count = 0
while i >= 0 {
guard let tok = context.tokens[i] as? Token else { return nil }
switch tok {
case .newline:
return count
case .text(let s, _) where s.allSatisfy({ $0 == " " }):
count += s.count
i -= 1
default:
return nil
}
default:
break
}
return false
return count
}

private func isBullet(_ tok: Token) -> Bool {
switch tok {
case .dash, .star, .plus: return true
default: return false
}
}

public func accept(context: CodeContext, token: any CodeToken) -> Bool {
guard let tok = token as? Token, isBullet(tok) else { return false }
guard context.index + 1 < context.tokens.count,
let next = context.tokens[context.index + 1] as? Token,
case .text(let s, _) = next, s.first?.isWhitespace == true else {
return false
}
if let ind = lineIndent(before: context.index, in: context) { return ind >= 0 } else { return false }
}

public func build(context: inout CodeContext) {
context.index += 1 // skip bullet
var text = ""
while context.index < context.tokens.count {
if let tok = context.tokens[context.index] as? Token {
func parseList(_ level: Int) -> CodeNode {
let list = CodeNode(type: Element.unorderedList, value: "")
var isLoose = false
while context.index < context.tokens.count {
guard let bullet = context.tokens[context.index] as? Token, isBullet(bullet), lineIndent(before: context.index, in: context) == level else { break }
let (node, loose) = parseItem(level)
if loose { isLoose = true }
list.addChild(node)
}
list.value = isLoose ? "loose" : "tight"
return list
}

func parseItem(_ level: Int) -> (CodeNode, Bool) {
var loose = false
// skip bullet and following whitespace
context.index += 1
if context.index < context.tokens.count,
let t = context.tokens[context.index] as? Token,
case .text(let s, _) = t, s.first?.isWhitespace == true {
context.index += 1
}

let node = CodeNode(type: Element.listItem, value: "")
var text = ""
itemLoop: while context.index < context.tokens.count {
guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue }
switch tok {
case .newline:
case .newline:
context.index += 1
let node = CodeNode(type: Element.listItem, value: text.trimmingCharacters(in: .whitespaces))
context.currentNode.addChild(node)
return
// Check for blank line
if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl {
loose = true
context.index += 1
}
let start = context.index
var spaces = 0
if start < context.tokens.count, let sTok = context.tokens[start] as? Token, case .text(let s, _) = sTok, s.allSatisfy({ $0 == " " }) {
spaces = s.count
context.index += 1
}
if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, isBullet(next), spaces > level {
let sub = parseList(spaces)
node.addChild(sub)
if context.index < context.tokens.count, let nextTok = context.tokens[context.index] as? Token, isBullet(nextTok), (lineIndent(before: context.index, in: context) ?? 0) <= level {
break itemLoop
}
} else if context.index < context.tokens.count, let next = context.tokens[context.index] as? Token, isBullet(next), spaces == level {
context.index = start
break itemLoop
} else if spaces > level {
text += "\n"
} else if spaces < level {
context.index = start
break itemLoop
} else {
text += "\n"
}
case .eof:
let node = CodeNode(type: Element.listItem, value: text.trimmingCharacters(in: .whitespaces))
context.currentNode.addChild(node)
context.index += 1
return
break itemLoop
default:
text += tok.text
context.index += 1
}
} else { context.index += 1 }
}
node.value = text.trimmingCharacters(in: .whitespaces)
return (node, loose)
}

if let ind = lineIndent(before: context.index, in: context) {
let list = parseList(ind)
context.currentNode.addChild(list)
}
}
}

public class OrderedListItemBuilder: CodeElementBuilder {
public class OrderedListBuilder: CodeElementBuilder {
public init() {}
public func accept(context: CodeContext, token: any CodeToken) -> Bool {
guard let tok = token as? Token else { return false }
if case .number = tok {
if context.index + 1 < context.tokens.count,
let dot = context.tokens[context.index + 1] as? Token,
case .dot = dot {
if context.index == 0 { return true }
if let prev = context.tokens[context.index - 1] as? Token, case .newline = prev {
return true
}

private func lineIndent(before idx: Int, in context: CodeContext) -> Int? {
if idx == 0 { return 0 }
var i = idx - 1
var count = 0
while i >= 0 {
guard let tok = context.tokens[i] as? Token else { return nil }
switch tok {
case .newline:
return count
case .text(let s, _) where s.allSatisfy({ $0 == " " }):
count += s.count
i -= 1
default:
return nil
}
}
return count
}

public func accept(context: CodeContext, token: any CodeToken) -> Bool {
guard let tok = token as? Token, case .number = tok else { return false }
guard context.index + 1 < context.tokens.count,
let dot = context.tokens[context.index + 1] as? Token, case .dot = dot else { return false }
if let _ = lineIndent(before: context.index, in: context) { return true }
return false
}

public func build(context: inout CodeContext) {
context.index += 2 // skip number and '.'
var text = ""
while context.index < context.tokens.count {
if let tok = context.tokens[context.index] as? Token {
func parseList(_ level: Int) -> CodeNode {
let list = CodeNode(type: Element.orderedList, value: "")
var isLoose = false
while context.index < context.tokens.count {
guard context.index + 1 < context.tokens.count,
let num = context.tokens[context.index] as? Token, case .number = num,
let dot = context.tokens[context.index + 1] as? Token, case .dot = dot,
lineIndent(before: context.index, in: context) == level else { break }
let (node, loose) = parseItem(level)
if loose { isLoose = true }
list.addChild(node)
}
list.value = isLoose ? "loose" : "tight"
return list
}

func parseItem(_ level: Int) -> (CodeNode, Bool) {
var loose = false
context.index += 2
if context.index < context.tokens.count,
let t = context.tokens[context.index] as? Token,
case .text(let s, _) = t, s.first?.isWhitespace == true {
context.index += 1
}

let node = CodeNode(type: Element.orderedListItem, value: "")
var text = ""
itemLoop: while context.index < context.tokens.count {
guard let tok = context.tokens[context.index] as? Token else { context.index += 1; continue }
switch tok {
case .newline:
context.index += 1
let node = CodeNode(type: Element.orderedListItem, value: text.trimmingCharacters(in: .whitespaces))
context.currentNode.addChild(node)
return
if context.index < context.tokens.count, let nl = context.tokens[context.index] as? Token, case .newline = nl {
loose = true
context.index += 1
}
let start = context.index
var spaces = 0
if start < context.tokens.count, let sTok = context.tokens[start] as? Token, case .text(let s, _) = sTok, s.allSatisfy({ $0 == " " }) {
spaces = s.count
context.index += 1
}
if context.index + 1 < context.tokens.count,
let nextNum = context.tokens[context.index] as? Token, case .number = nextNum,
let dot = context.tokens[context.index + 1] as? Token, case .dot = dot,
spaces > level {
let sub = parseList(spaces)
node.addChild(sub)
if context.index + 1 < context.tokens.count,
let nextBullet = context.tokens[context.index] as? Token, case .number = nextBullet,
let ndot = context.tokens[context.index + 1] as? Token, case .dot = ndot,
(lineIndent(before: context.index, in: context) ?? 0) <= level {
break itemLoop
}
} else if context.index + 1 < context.tokens.count,
let nextNum = context.tokens[context.index] as? Token, case .number = nextNum,
let dot = context.tokens[context.index + 1] as? Token, case .dot = dot,
spaces == level {
context.index = start
break itemLoop
} else if spaces > level {
text += "\n"
} else if spaces < level {
context.index = start
break itemLoop
} else {
text += "\n"
}
case .eof:
let node = CodeNode(type: Element.orderedListItem, value: text.trimmingCharacters(in: .whitespaces))
context.currentNode.addChild(node)
context.index += 1
return
break itemLoop
default:
text += tok.text
context.index += 1
}
} else { context.index += 1 }
}
node.value = text.trimmingCharacters(in: .whitespaces)
return (node, loose)
}

if let ind = lineIndent(before: context.index, in: context) {
let list = parseList(ind)
context.currentNode.addChild(list)
}
}
}
Expand Down Expand Up @@ -1216,7 +1369,7 @@ public struct MarkdownLanguage: CodeLanguage {

public var tokenizer: CodeTokenizer { Tokenizer() }
public var builders: [CodeElementBuilder] {
[HeadingBuilder(), SetextHeadingBuilder(), CodeBlockBuilder(), IndentedCodeBlockBuilder(), BlockQuoteBuilder(), ThematicBreakBuilder(), OrderedListItemBuilder(), ListItemBuilder(), ImageBuilder(), HTMLBuilder(), EntityBuilder(), StrikethroughBuilder(), AutoLinkBuilder(), BareAutoLinkBuilder(), TableBuilder(), FootnoteBuilder(), LinkReferenceDefinitionBuilder(), LinkBuilder(), StrongBuilder(), EmphasisBuilder(), InlineCodeBuilder(), ParagraphBuilder()]
[HeadingBuilder(), SetextHeadingBuilder(), CodeBlockBuilder(), IndentedCodeBlockBuilder(), BlockQuoteBuilder(), ThematicBreakBuilder(), OrderedListBuilder(), UnorderedListBuilder(), ImageBuilder(), HTMLBuilder(), EntityBuilder(), StrikethroughBuilder(), AutoLinkBuilder(), BareAutoLinkBuilder(), TableBuilder(), FootnoteBuilder(), LinkReferenceDefinitionBuilder(), LinkBuilder(), StrongBuilder(), EmphasisBuilder(), InlineCodeBuilder(), ParagraphBuilder()]
}
public var expressionBuilders: [CodeExpressionBuilder] { [] }
public var rootElement: any CodeElement { Element.root }
Expand Down
32 changes: 29 additions & 3 deletions Tests/SwiftParserTests/SwiftParserTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,42 @@ final class SwiftParserTests: XCTestCase {
let source = "- item1\n- item2"
let result = parser.parse(source, language: MarkdownLanguage())
XCTAssertEqual(result.errors.count, 0)
XCTAssertEqual(result.root.children.count, 2)
XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .listItem)
XCTAssertEqual(result.root.children.count, 1)
let list = result.root.children.first
XCTAssertEqual(list?.type as? MarkdownLanguage.Element, .unorderedList)
XCTAssertEqual(list?.children.count, 2)
XCTAssertEqual(list?.children.first?.type as? MarkdownLanguage.Element, .listItem)
}

func testMarkdownOrderedList() {
let parser = SwiftParser()
let source = "1. first\n2. second"
let result = parser.parse(source, language: MarkdownLanguage())
XCTAssertEqual(result.errors.count, 0)
XCTAssertEqual(result.root.children.first?.type as? MarkdownLanguage.Element, .orderedListItem)
let list = result.root.children.first
XCTAssertEqual(list?.type as? MarkdownLanguage.Element, .orderedList)
XCTAssertEqual(list?.children.count, 2)
XCTAssertEqual(list?.children.first?.type as? MarkdownLanguage.Element, .orderedListItem)
}

func testMarkdownNestedList() {
let parser = SwiftParser()
let source = "- item1\n - sub\n- item2"
let result = parser.parse(source, language: MarkdownLanguage())
XCTAssertEqual(result.errors.count, 0)
let list = result.root.children.first
XCTAssertEqual(list?.type as? MarkdownLanguage.Element, .unorderedList)
XCTAssertEqual(list?.children.count, 2)
let sub = list?.children.first?.children.first
XCTAssertEqual(sub?.type as? MarkdownLanguage.Element, .unorderedList)
}

func testMarkdownLooseList() {
let parser = SwiftParser()
let source = "- a\n\n- b"
let result = parser.parse(source, language: MarkdownLanguage())
XCTAssertEqual(result.errors.count, 0)
XCTAssertEqual(result.root.children.first?.value, "loose")
}

func testMarkdownEmphasisAndStrong() {
Expand Down