Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions Sources/SwiftParser/Core/CodeConstructor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ public class CodeConstructor<Node, Token> where Node: CodeNodeElement, Token: Co
/// - Parameters:
/// - builders: The node builders responsible for producing AST nodes.
/// - state: Factory returning the initial parsing state object.
public init(builders: [any CodeNodeBuilder<Node, Token>], state: @escaping () -> (any CodeConstructState<Node, Token>)?) {
public init(
builders: [any CodeNodeBuilder<Node, Token>],
state: @escaping () -> (any CodeConstructState<Node, Token>)?
) {
self.builders = builders
self.state = state
}
Expand All @@ -30,11 +33,6 @@ public class CodeConstructor<Node, Token> where Node: CodeNodeElement, Token: Co
var context = CodeConstructContext(current: root, tokens: tokens, state: state())

while context.consuming < context.tokens.count {
// Stop at EOF without recording an error
if let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .eof {
break
}

var matched = false
for node in builders {
Expand Down
8 changes: 8 additions & 0 deletions Sources/SwiftParser/Core/CodeLanguage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,12 @@ public protocol CodeLanguage<Node, Token> where Node: CodeNodeElement, Token: Co

/// The function that creates the initial context for tokenization.
func state() -> (any CodeTokenState<Token>)?

/// Provide an EOF token if the language requires one.
/// - Parameter range: The range where the EOF token should be inserted.
func eofToken(at range: Range<String.Index>) -> (any CodeToken<Token>)?
}

extension CodeLanguage {
public func eofToken(at range: Range<String.Index>) -> (any CodeToken<Token>)? { nil }
}
11 changes: 9 additions & 2 deletions Sources/SwiftParser/Core/CodeParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,15 @@ public class CodeParser<Node: CodeNodeElement, Token: CodeTokenElement> where No

public init(language: any CodeLanguage<Node, Token>) {
self.language = language
self.tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
self.constructor = CodeConstructor(builders: language.nodes, state: language.state)
self.tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
self.constructor = CodeConstructor(
builders: language.nodes,
state: language.state
)
}

/// Parse a source string using the supplied language.
Expand Down
15 changes: 10 additions & 5 deletions Sources/SwiftParser/Core/CodeTokenizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@
public class CodeTokenizer<Token> where Token: CodeTokenElement {
private let builders: [any CodeTokenBuilder<Token>]
private var state: () -> (any CodeTokenState<Token>)?
private let eofTokenFactory: ((Range<String.Index>) -> (any CodeToken<Token>)?)?

public init(builders: [any CodeTokenBuilder<Token>], state: @escaping () -> (any CodeTokenState<Token>)?) {
public init(
builders: [any CodeTokenBuilder<Token>],
state: @escaping () -> (any CodeTokenState<Token>)?,
eofTokenFactory: ((Range<String.Index>) -> (any CodeToken<Token>)?)? = nil
) {
self.builders = builders
self.state = state
self.eofTokenFactory = eofTokenFactory
}

public func tokenize(_ input: String) -> ([any CodeToken<Token>], [CodeError]) {
Expand Down Expand Up @@ -42,10 +48,9 @@ public class CodeTokenizer<Token> where Token: CodeTokenElement {
}
}

// Automatically append EOF token for Markdown
if Token.self == MarkdownTokenElement.self,
let eof = MarkdownToken.eof(at: input.endIndex..<input.endIndex) as? any CodeToken<Token> {
context.tokens.append(eof)
// Append EOF token if provided by the language
if let token = eofTokenFactory?(input.endIndex..<input.endIndex) {
context.tokens.append(token)
}

return (context.tokens, context.errors)
Expand Down
7 changes: 6 additions & 1 deletion Sources/SwiftParser/Markdown/MarkdownLanguage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ public class MarkdownLanguage: CodeLanguage {
MarkdownListBuilder(),
MarkdownBlockquoteBuilder(),
MarkdownParagraphBuilder(),
MarkdownNewlineBuilder()
MarkdownNewlineBuilder(),
MarkdownEOFBuilder()
]
) {
self.nodes = consumers
Expand Down Expand Up @@ -67,6 +68,10 @@ public class MarkdownLanguage: CodeLanguage {
public func state() -> (any CodeTokenState<MarkdownTokenElement>)? {
nil
}

public func eofToken(at range: Range<String.Index>) -> (any CodeToken<MarkdownTokenElement>)? {
return MarkdownToken.eof(at: range)
}
}

// MARK: - Language Configuration
Expand Down
14 changes: 14 additions & 0 deletions Sources/SwiftParser/Markdown/Nodes/MarkdownEOFBuilder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import Foundation

/// Consumes trailing EOF tokens without modifying the AST.
public class MarkdownEOFBuilder: CodeNodeBuilder {
public init() {}

public func build(from context: inout CodeConstructContext<MarkdownNodeElement, MarkdownTokenElement>) -> Bool {
guard context.consuming < context.tokens.count,
let token = context.tokens[context.consuming] as? MarkdownToken,
token.element == .eof else { return false }
context.consuming += 1
return true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import XCTest
final class MarkdownCodeTokenizerBasicTests: XCTestCase {
func testHeadingTokenization() {
let language = MarkdownLanguage()
let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
let tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
let (tokens, _) = tokenizer.tokenize("# Title")
XCTAssertEqual(tokens.count, 4)
XCTAssertEqual(tokens[0].element, .hash)
Expand All @@ -15,7 +19,11 @@ final class MarkdownCodeTokenizerBasicTests: XCTestCase {

func testAutolinkTokenization() {
let language = MarkdownLanguage()
let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
let tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
let (tokens, _) = tokenizer.tokenize("<https://example.com>")
XCTAssertEqual(tokens.count, 2)
XCTAssertEqual(tokens[0].element, .autolink)
Expand All @@ -25,7 +33,11 @@ final class MarkdownCodeTokenizerBasicTests: XCTestCase {

func testBareURLTokenization() {
let language = MarkdownLanguage()
let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
let tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
let (tokens, _) = tokenizer.tokenize("https://example.com")
XCTAssertEqual(tokens.count, 2)
XCTAssertEqual(tokens[0].element, .url)
Expand All @@ -34,7 +46,11 @@ final class MarkdownCodeTokenizerBasicTests: XCTestCase {

func testBareEmailTokenization() {
let language = MarkdownLanguage()
let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
let tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
let (tokens, _) = tokenizer.tokenize("user@example.com")
XCTAssertEqual(tokens.count, 2)
XCTAssertEqual(tokens[0].element, .email)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import XCTest
final class MarkdownCodeTokenizerCodeTests: XCTestCase {
private func tokenize(_ input: String) -> [any CodeToken<MarkdownTokenElement>] {
let language = MarkdownLanguage()
let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
let tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
let (tokens, _) = tokenizer.tokenize(input)
return tokens
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import XCTest
final class MarkdownCodeTokenizerCustomContainerTests: XCTestCase {
private func tokenize(_ input: String) -> [any CodeToken<MarkdownTokenElement>] {
let language = MarkdownLanguage()
let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
let tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
let (tokens, _) = tokenizer.tokenize(input)
return tokens
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import XCTest
final class MarkdownCodeTokenizerFormulaTests: XCTestCase {
private func tokenize(_ input: String) -> [any CodeToken<MarkdownTokenElement>] {
let language = MarkdownLanguage()
let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
let tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
let (tokens, _) = tokenizer.tokenize(input)
return tokens
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ import XCTest
final class MarkdownCodeTokenizerHTMLTests: XCTestCase {
private func tokenize(_ input: String) -> [any CodeToken<MarkdownTokenElement>] {
let language = MarkdownLanguage()
let tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
let tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
let (tokens, _) = tokenizer.tokenize(input)
return tokens
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ final class MarkdownTokenizerBasicTests: XCTestCase {
override func setUp() {
super.setUp()
let language = MarkdownLanguage()
tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
}

override func tearDown() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ final class MarkdownTokenizerComplexTests: XCTestCase {
override func setUp() {
super.setUp()
let language = MarkdownLanguage()
tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
}

override func tearDown() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ final class MarkdownTokenizerFormulaTests: XCTestCase {
override func setUp() {
super.setUp()
let language = MarkdownLanguage()
tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
}

override func tearDown() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ final class MarkdownTokenizerHTMLTests: XCTestCase {
override func setUp() {
super.setUp()
let language = MarkdownLanguage()
tokenizer = CodeTokenizer(builders: language.tokens, state: language.state)
tokenizer = CodeTokenizer(
builders: language.tokens,
state: language.state,
eofTokenFactory: { language.eofToken(at: $0) }
)
}

override func tearDown() {
Expand Down