Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 17 additions & 156 deletions Sources/SwiftParser/CodeParser.swift
Original file line number Diff line number Diff line change
@@ -1,189 +1,50 @@
import Foundation

public final class CodeParser {
private var builders: [CodeElementBuilder]
private var consumers: [CodeTokenConsumer]
private let tokenizer: CodeTokenizer
private var expressionBuilders: [CodeExpressionBuilder]

// State for incremental parsing
private var lastContext: CodeContext?
private var snapshots: [Int: CodeContext.Snapshot] = [:]
private var lastTokens: [any CodeToken] = []
// Registered state is now reset for each parse run

public init(tokenizer: CodeTokenizer, builders: [CodeElementBuilder] = [], expressionBuilders: [CodeExpressionBuilder] = []) {
self.tokenizer = tokenizer
self.builders = builders
self.expressionBuilders = expressionBuilders
public init(language: CodeLanguage) {
self.tokenizer = language.tokenizer
self.consumers = language.consumers
}

public func register(builder: CodeElementBuilder) {
builders.append(builder)
}

public func unregister(builder: CodeElementBuilder) {
if let target = builder as? AnyObject {
if let index = builders.firstIndex(where: { ($0 as? AnyObject) === target }) {
builders.remove(at: index)
}
}
}

public func clearBuilders() {
builders.removeAll()
}

public func register(expressionBuilder: CodeExpressionBuilder) {
expressionBuilders.append(expressionBuilder)
}

public func unregister(expressionBuilder: CodeExpressionBuilder) {
if let target = expressionBuilder as? AnyObject {
if let index = expressionBuilders.firstIndex(where: { ($0 as? AnyObject) === target }) {
expressionBuilders.remove(at: index)
}
}
}

public func clearExpressionBuilders() {
expressionBuilders.removeAll()
}

public func parse(_ input: String, rootNode: CodeNode) -> (node: CodeNode, context: CodeContext) {
let tokens = tokenizer.tokenize(input)
var context = CodeContext(tokens: tokens, index: 0, currentNode: rootNode, errors: [], input: input, linkReferences: [:])
var context = CodeContext(tokens: tokens, currentNode: rootNode, errors: [])

snapshots = [:]
lastTokens = tokens
// Infinite loop protection: track token count progression
var lastCount = context.tokens.count + 1

// Infinite loop protection: track index progression
var lastIndex = -1

while context.index < context.tokens.count {
// Infinite loop detection - if index hasn't advanced, terminate parsing immediately
if context.index == lastIndex {
context.errors.append(CodeError("Infinite loop detected: parser stuck at token index \(context.index). Terminating parse to prevent hang.", range: context.tokens[context.index].range))
while let token = context.tokens.first {
// Infinite loop detection - if token count hasn't decreased, terminate parsing immediately
if context.tokens.count == lastCount {
context.errors.append(CodeError("Infinite loop detected: parser stuck at token \(token.kindDescription). Terminating parse to prevent hang.", range: token.range))
break
}
lastIndex = context.index

snapshots[context.index] = context.snapshot()
let token = context.tokens[context.index]
lastCount = context.tokens.count

if token.kindDescription == "eof" {
break
}
var matched = false
for builder in builders {
if builder.accept(context: context, token: token) {
builder.build(context: &context)
for consumer in consumers {
if consumer.consume(context: &context, token: token) {
matched = true
break
}
}
if !matched {
for expr in expressionBuilders {
if expr.accept(context: context, token: token) {
if let node = expr.parse(context: &context) {
context.currentNode.addChild(node)
}
matched = true
break
}
}
}
if !matched {
context.errors.append(CodeError("Unrecognized token \(token.kindDescription)", range: token.range))
context.index += 1
}
}
snapshots[context.index] = context.snapshot()
lastContext = context
return (rootNode, context)
}

public func update(_ input: String, rootNode: CodeNode) -> (node: CodeNode, context: CodeContext) {
guard var context = lastContext else {
return parse(input, rootNode: rootNode)
}

let newTokens = tokenizer.tokenize(input)

var diffIndex = 0
while diffIndex < min(lastTokens.count, newTokens.count) {
if !tokenEqual(lastTokens[diffIndex], newTokens[diffIndex]) {
break
}
diffIndex += 1
}

var restoreIndex = diffIndex
while restoreIndex >= 0 && snapshots[restoreIndex] == nil {
restoreIndex -= 1
}
if let snap = snapshots[restoreIndex] {
context.restore(snap)
}

context.tokens = newTokens
context.index = restoreIndex

snapshots = snapshots.filter { $0.key <= restoreIndex }
lastTokens = newTokens

// Infinite loop protection for update method
var lastIndex = -1

while context.index < context.tokens.count {
// Infinite loop detection - if index hasn't advanced, terminate parsing immediately
if context.index == lastIndex {
context.errors.append(CodeError("Infinite loop detected in update: parser stuck at token index \(context.index). Terminating parse to prevent hang.", range: context.tokens[context.index].range))
break
}
lastIndex = context.index

snapshots[context.index] = context.snapshot()
let token = context.tokens[context.index]
if token.kindDescription == "eof" { break }
var matched = false
for builder in builders {
if builder.accept(context: context, token: token) {
builder.build(context: &context)
matched = true
break
}
}
if !matched {
for expr in expressionBuilders {
if expr.accept(context: context, token: token) {
if let node = expr.parse(context: &context) {
context.currentNode.addChild(node)
}
matched = true
break
}
}
}
if !matched {
context.errors.append(CodeError("Unrecognized token \(token.kindDescription)", range: token.range))
context.index += 1
context.tokens.removeFirst()
}
}
snapshots[context.index] = context.snapshot()
lastContext = context
return (rootNode, context)
}

private func tokenEqual(_ a: any CodeToken, _ b: any CodeToken) -> Bool {
return a.kindDescription == b.kindDescription && a.text == b.text
}

public func parseExpression(context: inout CodeContext, minBP: Int = 0) -> CodeNode? {
guard context.index < context.tokens.count else { return nil }
let token = context.tokens[context.index]
for expr in expressionBuilders {
if expr.accept(context: context, token: token) {
return expr.parse(context: &context, minBP: minBP)
}
}
return nil
}
}
45 changes: 6 additions & 39 deletions Sources/SwiftParser/Core.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ public protocol CodeTokenizer {
func tokenize(_ input: String) -> [any CodeToken]
}

public protocol CodeElementBuilder {
func accept(context: CodeContext, token: any CodeToken) -> Bool
func build(context: inout CodeContext)
/// Consumes a token and optionally updates the AST if it is recognized.
/// - Returns: `true` if the token was handled and the context advanced.
public protocol CodeTokenConsumer {
func consume(context: inout CodeContext, token: any CodeToken) -> Bool
}

public class CodeNode {
Expand Down Expand Up @@ -138,52 +139,18 @@ public struct CodeError: Error {

public struct CodeContext {
public var tokens: [any CodeToken]
public var index: Int
public var currentNode: CodeNode
public var errors: [CodeError]
public let input: String
public var linkReferences: [String: String]

public init(tokens: [any CodeToken], index: Int, currentNode: CodeNode, errors: [CodeError], input: String, linkReferences: [String: String] = [:]) {
public init(tokens: [any CodeToken], currentNode: CodeNode, errors: [CodeError]) {
self.tokens = tokens
self.index = index
self.currentNode = currentNode
self.errors = errors
self.input = input
self.linkReferences = linkReferences
}

/// Snapshot represents a parser state that can be restored later.
public struct Snapshot {
fileprivate let index: Int
fileprivate let node: CodeNode
fileprivate let childCount: Int
fileprivate let errorCount: Int
fileprivate let linkReferences: [String: String]
}

/// Capture the current parser state so it can be restored on demand.
public func snapshot() -> Snapshot {
Snapshot(index: index, node: currentNode, childCount: currentNode.children.count, errorCount: errors.count, linkReferences: linkReferences)
}

/// Restore the parser to a previously captured state, discarding any new nodes or errors.
public mutating func restore(_ snapshot: Snapshot) {
index = snapshot.index
currentNode = snapshot.node
if currentNode.children.count > snapshot.childCount {
currentNode.children.removeLast(currentNode.children.count - snapshot.childCount)
}
if errors.count > snapshot.errorCount {
errors.removeLast(errors.count - snapshot.errorCount)
}
linkReferences = snapshot.linkReferences
}
}

public protocol CodeLanguage {
var tokenizer: CodeTokenizer { get }
var builders: [CodeElementBuilder] { get }
var consumers: [CodeTokenConsumer] { get }
var rootElement: any CodeElement { get }
var expressionBuilders: [CodeExpressionBuilder] { get }
}
36 changes: 0 additions & 36 deletions Sources/SwiftParser/ExpressionBuilder.swift

This file was deleted.

This file was deleted.

This file was deleted.

Loading