Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Sources/SwiftParser/Markdown/MarkdownLanguage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class MarkdownLanguage: CodeLanguage {
) {
self.outdatedTokenizer = outdatedTokenizer
self.nodes = consumers
self.tokens = []
self.tokens = MarkdownTokenBuilders.commonMarkBase()
}

// MARK: - Language Protocol Implementation
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import Foundation

struct BacktickTokenBuilder: CodeTokenBuilder {
typealias Element = MarkdownTokenElement

func build(from context: CodeTokenContext<MarkdownTokenElement>) -> Bool {
guard context.consuming < context.source.endIndex else { return false }
guard context.source[context.consuming] == "`" else { return false }

// Count consecutive backticks
var idx = context.consuming
var tickCount = 0
while idx < context.source.endIndex && context.source[idx] == "`" {
tickCount += 1
idx = context.source.index(after: idx)
}
let start = context.consuming
var end = idx
var foundClosing = false

// Search for closing sequence of same length
while end < context.source.endIndex {
if context.source[end] == "`" {
var check = end
var count = 0
while check < context.source.endIndex && context.source[check] == "`" && count < tickCount {
count += 1
check = context.source.index(after: check)
}
if count == tickCount {
end = check
foundClosing = true
break
}
}
end = context.source.index(after: end)
}

if !foundClosing {
// No closing delimiter - treat first backtick as text
return false
}

context.consuming = end
let range = start..<end
let text = String(context.source[range])
if tickCount >= 3 {
context.tokens.append(MarkdownToken.fencedCodeBlock(text, at: range))
} else {
context.tokens.append(MarkdownToken.inlineCode(text, at: range))
}
return true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import Foundation

enum MarkdownTokenBuilders {
static func commonMarkBase() -> [any CodeTokenBuilder<MarkdownTokenElement>] {
var builders: [any CodeTokenBuilder<MarkdownTokenElement>] = []
// Special structures first
builders.append(BacktickTokenBuilder())
// Whitespace
builders.append(WhitespaceTokenBuilder(character: " ", element: .space))
builders.append(WhitespaceTokenBuilder(character: "\t", element: .tab))
builders.append(WhitespaceTokenBuilder(character: "\n", element: .newline))
builders.append(WhitespaceTokenBuilder(character: "\r", element: .carriageReturn))
// Single character tokens
let singles: [(Character, MarkdownTokenElement)] = [
("#", .hash), ("*", .asterisk), ("_", .underscore), ("-", .dash),
("+", .plus), ("=", .equals), ("~", .tilde), ("^", .caret),
("|", .pipe), (":", .colon), (";", .semicolon), ("!", .exclamation),
("?", .question), (".", .dot), (",", .comma), (">", .gt), ("<", .lt),
("&", .ampersand), ("\\", .backslash), ("/", .forwardSlash),
("\"", .quote), ("'", .singleQuote), ("[", .leftBracket), ("]", .rightBracket),
("(", .leftParen), (")", .rightParen), ("{", .leftBrace), ("}", .rightBrace)
]
for (char, element) in singles {
builders.append(SingleCharacterTokenBuilder(character: char, element: element))
}
// Numbers and text
builders.append(NumberTokenBuilder())
builders.append(TextTokenBuilder())
return builders
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import Foundation

struct NumberTokenBuilder: CodeTokenBuilder {
typealias Element = MarkdownTokenElement
func build(from context: CodeTokenContext<MarkdownTokenElement>) -> Bool {
guard context.consuming < context.source.endIndex else { return false }
var idx = context.consuming
var hasDigit = false
while idx < context.source.endIndex && context.source[idx].isNumber {
idx = context.source.index(after: idx)
hasDigit = true
}
guard hasDigit else { return false }
let range = context.consuming..<idx
let text = String(context.source[range])
context.consuming = idx
context.tokens.append(MarkdownToken.number(text, at: range))
return true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import Foundation

struct SingleCharacterTokenBuilder: CodeTokenBuilder {
typealias Element = MarkdownTokenElement
let character: Character
let element: MarkdownTokenElement

init(character: Character, element: MarkdownTokenElement) {
self.character = character
self.element = element
}

func build(from context: CodeTokenContext<MarkdownTokenElement>) -> Bool {
guard context.consuming < context.source.endIndex else { return false }
if context.source[context.consuming] == character {
let start = context.consuming
context.consuming = context.source.index(after: start)
let token = MarkdownToken(element: element, text: String(character), range: start..<context.consuming)
context.tokens.append(token)
return true
}
return false
}
}
32 changes: 32 additions & 0 deletions Sources/SwiftParser/Markdown/TokenBuilders/TextTokenBuilder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import Foundation

struct TextTokenBuilder: CodeTokenBuilder {
typealias Element = MarkdownTokenElement

func build(from context: CodeTokenContext<MarkdownTokenElement>) -> Bool {
guard context.consuming < context.source.endIndex else { return false }
var idx = context.consuming
while idx < context.source.endIndex {
let c = context.source[idx]
if isSpecial(c) { break }
idx = context.source.index(after: idx)
}
guard idx > context.consuming else { return false }
let range = context.consuming..<idx
let text = String(context.source[range])
context.consuming = idx
context.tokens.append(MarkdownToken.text(text, at: range))
return true
}

private func isSpecial(_ char: Character) -> Bool {
switch char {
case "#", "*", "_", "`", "-", "+", "=", "~", "^", "|", ":", ";", "!", "?", ".", ",", ">", "<", "&", "\\", "/", "\"", "'", "[", "]", "(", ")", "{", "}", "$":
return true
case " ", "\t", "\n", "\r":
return true
default:
return false
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import Foundation

struct WhitespaceTokenBuilder: CodeTokenBuilder {
typealias Element = MarkdownTokenElement
private let character: Character
private let element: MarkdownTokenElement

init(character: Character, element: MarkdownTokenElement) {
self.character = character
self.element = element
}

func build(from context: CodeTokenContext<MarkdownTokenElement>) -> Bool {
guard context.consuming < context.source.endIndex else { return false }
if element == .newline || element == .carriageReturn {
return buildNewline(from: context)
}
if context.source[context.consuming] == character {
let start = context.consuming
context.consuming = context.source.index(after: start)
let token = MarkdownToken(element: element, text: String(character), range: start..<context.consuming)
context.tokens.append(token)
return true
}
return false
}

private func buildNewline(from context: CodeTokenContext<MarkdownTokenElement>) -> Bool {
let index = context.consuming
let char = context.source[index]
if char == "\n" {
context.consuming = context.source.index(after: index)
let token = MarkdownToken.newline(at: index..<context.consuming)
context.tokens.append(token)
return true
} else if char == "\r" {
let next = context.source.index(after: index)
if next < context.source.endIndex && context.source[next] == "\n" {
context.consuming = context.source.index(after: next)
let token = MarkdownToken.newline(at: index..<context.consuming)
context.tokens.append(token)
} else {
context.consuming = next
let token = MarkdownToken(element: .carriageReturn, text: "\r", range: index..<context.consuming)
context.tokens.append(token)
}
return true
}
return false
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import XCTest
@testable import SwiftParser

final class CodeTokenizerBasicTests: XCTestCase {
private var tokenizer: CodeTokenizer<MarkdownNodeElement, MarkdownTokenElement>!

override func setUp() {
let language = MarkdownLanguage()
tokenizer = CodeTokenizer(language: language)
}

func testSingleCharacterToken() {
let tokens = tokenizer.tokenize("#")
XCTAssertEqual(tokens.count, 1)
XCTAssertEqual(tokens[0].element, .hash)
}

func testInlineCode() {
let tokens = tokenizer.tokenize("`code`")
XCTAssertFalse(tokens.isEmpty)
XCTAssertEqual(tokens[0].element, .inlineCode)
XCTAssertEqual(tokens[0].text, "`code`")
}
}