Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions Sources/SwiftParser/CodeParser.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import Foundation

public final class CodeParser {
private var builders: [CodeElementBuilder]
private let tokenizer: CodeTokenizer
private var expressionBuilders: [CodeExpressionBuilder]

// State for incremental parsing
private var lastContext: CodeContext?
private var snapshots: [Int: CodeContext.Snapshot] = [:]
private var lastTokens: [any CodeToken] = []

public init(tokenizer: CodeTokenizer, builders: [CodeElementBuilder] = [], expressionBuilders: [CodeExpressionBuilder] = []) {
self.tokenizer = tokenizer
self.builders = builders
self.expressionBuilders = expressionBuilders
}

public func register(builder: CodeElementBuilder) {
builders.append(builder)
}

public func unregister(builder: CodeElementBuilder) {
if let target = builder as? AnyObject {
if let index = builders.firstIndex(where: { ($0 as? AnyObject) === target }) {
builders.remove(at: index)
}
}
}

public func clearBuilders() {
builders.removeAll()
}

public func register(expressionBuilder: CodeExpressionBuilder) {
expressionBuilders.append(expressionBuilder)
}

public func unregister(expressionBuilder: CodeExpressionBuilder) {
if let target = expressionBuilder as? AnyObject {
if let index = expressionBuilders.firstIndex(where: { ($0 as? AnyObject) === target }) {
expressionBuilders.remove(at: index)
}
}
}

public func clearExpressionBuilders() {
expressionBuilders.removeAll()
}

public func parse(_ input: String, rootNode: CodeNode) -> (node: CodeNode, context: CodeContext) {
let tokens = tokenizer.tokenize(input)
var context = CodeContext(tokens: tokens, index: 0, currentNode: rootNode, errors: [], input: input)

snapshots = [:]
lastTokens = tokens

while context.index < context.tokens.count {
snapshots[context.index] = context.snapshot()
let token = context.tokens[context.index]
if token.kindDescription == "eof" {
break
}
var matched = false
for builder in builders {
if builder.accept(context: context, token: token) {
builder.build(context: &context)
matched = true
break
}
}
if !matched {
for expr in expressionBuilders {
if expr.accept(context: context, token: token) {
if let node = expr.parse(context: &context) {
context.currentNode.addChild(node)
}
matched = true
break
}
}
}
if !matched {
context.errors.append(CodeError("Unrecognized token \(token.kindDescription)", range: token.range))
context.index += 1
}
}
snapshots[context.index] = context.snapshot()
lastContext = context
return (rootNode, context)
}

public func update(_ input: String, rootNode: CodeNode) -> (node: CodeNode, context: CodeContext) {
guard var context = lastContext else {
return parse(input, rootNode: rootNode)
}

let newTokens = tokenizer.tokenize(input)

var diffIndex = 0
while diffIndex < min(lastTokens.count, newTokens.count) {
if !tokenEqual(lastTokens[diffIndex], newTokens[diffIndex]) {
break
}
diffIndex += 1
}

var restoreIndex = diffIndex
while restoreIndex >= 0 && snapshots[restoreIndex] == nil {
restoreIndex -= 1
}
if let snap = snapshots[restoreIndex] {
context.restore(snap)
}

context.tokens = newTokens
context.index = restoreIndex

snapshots = snapshots.filter { $0.key <= restoreIndex }
lastTokens = newTokens

while context.index < context.tokens.count {
snapshots[context.index] = context.snapshot()
let token = context.tokens[context.index]
if token.kindDescription == "eof" { break }
var matched = false
for builder in builders {
if builder.accept(context: context, token: token) {
builder.build(context: &context)
matched = true
break
}
}
if !matched {
for expr in expressionBuilders {
if expr.accept(context: context, token: token) {
if let node = expr.parse(context: &context) {
context.currentNode.addChild(node)
}
matched = true
break
}
}
}
if !matched {
context.errors.append(CodeError("Unrecognized token \(token.kindDescription)", range: token.range))
context.index += 1
}
}
snapshots[context.index] = context.snapshot()
lastContext = context
return (rootNode, context)
}

private func tokenEqual(_ a: any CodeToken, _ b: any CodeToken) -> Bool {
return a.kindDescription == b.kindDescription && a.text == b.text
}

public func parseExpression(context: inout CodeContext, minBP: Int = 0) -> CodeNode? {
guard context.index < context.tokens.count else { return nil }
let token = context.tokens[context.index]
for expr in expressionBuilders {
if expr.accept(context: context, token: token) {
return expr.parse(context: &context, minBP: minBP)
}
}
return nil
}
}
104 changes: 104 additions & 0 deletions Sources/SwiftParser/Core.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import Foundation

public protocol CodeElement {}

public protocol CodeToken {
var kindDescription: String { get }
var text: String { get }
var range: Range<String.Index> { get }
}

public protocol CodeTokenizer {
func tokenize(_ input: String) -> [any CodeToken]
}

public protocol CodeElementBuilder {
func accept(context: CodeContext, token: any CodeToken) -> Bool
func build(context: inout CodeContext)
}

public final class CodeNode {
public let type: any CodeElement
public var value: String
public weak var parent: CodeNode?
public var children: [CodeNode] = []
public var range: Range<String.Index>?

public var id: Int {
var hasher = Hasher()
hasher.combine(String(describing: type))
hasher.combine(value)
for child in children {
hasher.combine(child.id)
}
return hasher.finalize()
}

public init(type: any CodeElement, value: String, range: Range<String.Index>? = nil) {
self.type = type
self.value = value
self.range = range
}

public func addChild(_ node: CodeNode) {
node.parent = self
children.append(node)
}
}

public struct CodeError: Error {
public let message: String
public let range: Range<String.Index>?
public init(_ message: String, range: Range<String.Index>? = nil) {
self.message = message
self.range = range
}
}

public struct CodeContext {
public var tokens: [any CodeToken]
public var index: Int
public var currentNode: CodeNode
public var errors: [CodeError]
public let input: String

public init(tokens: [any CodeToken], index: Int, currentNode: CodeNode, errors: [CodeError], input: String) {
self.tokens = tokens
self.index = index
self.currentNode = currentNode
self.errors = errors
self.input = input
}

/// Snapshot represents a parser state that can be restored later.
public struct Snapshot {
fileprivate let index: Int
fileprivate let node: CodeNode
fileprivate let childCount: Int
fileprivate let errorCount: Int
}

/// Capture the current parser state so it can be restored on demand.
public func snapshot() -> Snapshot {
Snapshot(index: index, node: currentNode, childCount: currentNode.children.count, errorCount: errors.count)
}

/// Restore the parser to a previously captured state, discarding any new nodes or errors.
public mutating func restore(_ snapshot: Snapshot) {
index = snapshot.index
currentNode = snapshot.node
if currentNode.children.count > snapshot.childCount {
currentNode.children.removeLast(currentNode.children.count - snapshot.childCount)
}
if errors.count > snapshot.errorCount {
errors.removeLast(errors.count - snapshot.errorCount)
}
}
}

public protocol CodeLanguage {
var tokenizer: CodeTokenizer { get }
var builders: [CodeElementBuilder] { get }
var rootElement: any CodeElement { get }
var expressionBuilders: [CodeExpressionBuilder] { get }
}
36 changes: 36 additions & 0 deletions Sources/SwiftParser/ExpressionBuilder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import Foundation

public protocol CodeExpressionBuilder: CodeElementBuilder {
func isPrefix(token: any CodeToken) -> Bool
func prefix(context: inout CodeContext, token: any CodeToken) -> CodeNode?
func infixBindingPower(of token: any CodeToken) -> (left: Int, right: Int)?
func infix(context: inout CodeContext, left: CodeNode, token: any CodeToken, right: CodeNode) -> CodeNode
}

public extension CodeExpressionBuilder {
func accept(context: CodeContext, token: any CodeToken) -> Bool {
return isPrefix(token: token)
}

func build(context: inout CodeContext) {
if let node = parse(context: &context) {
context.currentNode.addChild(node)
}
}

func parse(context: inout CodeContext, minBP: Int = 0) -> CodeNode? {
guard context.index < context.tokens.count else { return nil }
let first = context.tokens[context.index]
guard isPrefix(token: first) else { return nil }
context.index += 1
guard var left = prefix(context: &context, token: first) else { return nil }
while context.index < context.tokens.count {
let opToken = context.tokens[context.index]
guard let bp = infixBindingPower(of: opToken), bp.left >= minBP else { break }
context.index += 1
let right = parse(context: &context, minBP: bp.right) ?? CodeNode(type: left.type, value: "")
left = infix(context: &context, left: left, token: opToken, right: right)
}
return left
}
}
Loading