1+ import CodeParserCore
2+ import Foundation
3+
4+ /// CommonMark-compliant block parser that follows the official CommonMark parsing algorithm
5+ /// This parser handles the block structure phase of CommonMark parsing:
6+ /// 1. Check continuation of open blocks
7+ /// 2. Close blocks that cannot continue
8+ /// 3. Open new blocks as needed
9+ /// 4. Add content to the current open block
10+ public class CommonMarkBlockParser : CodeNodeBuilder {
11+ public typealias Node = MarkdownNodeElement
12+ public typealias Token = MarkdownTokenElement
13+
14+ private let builders : [ CommonMarkBlockBuilder ]
15+
16+ public init ( builders: [ CommonMarkBlockBuilder ] ) {
17+ // Sort builders by priority (lower number = higher priority)
18+ self . builders = builders. sorted { $0. priority < $1. priority }
19+ }
20+
21+ public func build( from context: inout CodeConstructContext < Node , Token > ) -> Bool {
22+ guard context. consuming < context. tokens. count else {
23+ return false
24+ }
25+
26+ let lines = extractLines ( from: context)
27+ guard !lines. isEmpty else { return false }
28+
29+ for line in lines {
30+ processLine ( line, context: & context)
31+ }
32+
33+ // Consume all tokens since we processed all lines
34+ context. consuming = context. tokens. count
35+
36+ return true
37+ }
38+
39+ /// Process a single line following CommonMark algorithm
40+ private func processLine(
41+ _ line: [ any CodeToken < MarkdownTokenElement > ] ,
42+ context: inout CodeConstructContext < Node , Token >
43+ ) {
44+ guard let state = context. state as? MarkdownConstructState else { return }
45+
46+ // Reset line position
47+ state. position = 0
48+ state. isPartialLine = false
49+
50+ // Step 1: Check continuation of open blocks (from innermost to outermost)
51+ let openBlocks = collectOpenBlocks ( from: context. current)
52+ var continuedBlocks : [ MarkdownNodeBase ] = [ ]
53+
54+ for block in openBlocks. reversed ( ) { // Process from innermost to outermost
55+ if let builder = findBuilder ( for: block) {
56+ if builder. canContinue ( block: block, line: line, state: state) {
57+ continuedBlocks. append ( block)
58+ // Process the line for this block
59+ _ = builder. processLine ( for: block, line: line, state: state, context: & context)
60+ } else {
61+ // This block cannot continue, so we stop here
62+ break
63+ }
64+ }
65+ }
66+
67+ // Step 2: Close blocks that couldn't continue
68+ let lastContinuedBlock = continuedBlocks. last
69+ closeBlocksAfter ( lastContinuedBlock, in: openBlocks, context: & context)
70+
71+ // Step 3: Try to start new blocks (if line wasn't fully consumed by continuation)
72+ if !isLineFullyProcessed( line, state: state) {
73+ tryStartNewBlocks ( line, context: & context, state: state)
74+ }
75+
76+ // Step 4: If no new block was started, add content to the last open block
77+ if !isLineFullyProcessed( line, state: state) {
78+ addContentToCurrentBlock ( line, context: & context, state: state)
79+ }
80+ }
81+
82+ /// Collect all currently open blocks from current context up to root
83+ private func collectOpenBlocks( from current: CodeNode < MarkdownNodeElement > ) -> [ MarkdownNodeBase ] {
84+ var blocks : [ MarkdownNodeBase ] = [ ]
85+ var node : CodeNode < MarkdownNodeElement > ? = current
86+
87+ while let currentNode = node {
88+ if let markdownNode = currentNode as? MarkdownNodeBase {
89+ blocks. append ( markdownNode)
90+ }
91+ node = currentNode. parent
92+ }
93+
94+ return blocks
95+ }
96+
97+ /// Find the builder responsible for a specific block type
98+ private func findBuilder( for block: MarkdownNodeBase ) -> CommonMarkBlockBuilder ? {
99+ return builders. first { builder in
100+ builder. blockType == block. element
101+ }
102+ }
103+
104+ /// Close blocks that couldn't continue past the last continued block
105+ private func closeBlocksAfter(
106+ _ lastContinuedBlock: MarkdownNodeBase ? ,
107+ in openBlocks: [ MarkdownNodeBase ] ,
108+ context: inout CodeConstructContext < Node , Token >
109+ ) {
110+ guard let lastContinued = lastContinuedBlock else {
111+ // No blocks continued, close all except document
112+ if let documentBlock = openBlocks. first ( where: { $0. element == . document } ) {
113+ context. current = documentBlock as CodeNode < MarkdownNodeElement >
114+ }
115+ return
116+ }
117+
118+ // Close blocks after the last continued one
119+ var foundLastContinued = false
120+ for block in openBlocks {
121+ if foundLastContinued {
122+ // This block should be closed - move context to its parent
123+ if let parent = ( block as CodeNode < MarkdownNodeElement > ) . parent {
124+ context. current = parent
125+ }
126+ }
127+ if block === lastContinued {
128+ foundLastContinued = true
129+ context. current = block as CodeNode < MarkdownNodeElement >
130+ }
131+ }
132+ }
133+
134+ /// Try to start new blocks with the current line
135+ private func tryStartNewBlocks(
136+ _ line: [ any CodeToken < MarkdownTokenElement > ] ,
137+ context: inout CodeConstructContext < Node , Token > ,
138+ state: MarkdownConstructState
139+ ) {
140+ for builder in builders {
141+ if builder. canStart ( line: line, state: state) {
142+ if let newBlock = builder. createBlock ( from: line, state: state, context: & context) {
143+ // Add the new block to current context and make it current
144+ context. current. append ( newBlock as CodeNode < MarkdownNodeElement > )
145+ context. current = newBlock as CodeNode < MarkdownNodeElement >
146+
147+ // Process the line for the new block
148+ _ = builder. processLine ( for: newBlock, line: line, state: state, context: & context)
149+ return
150+ }
151+ }
152+ }
153+ }
154+
155+ /// Add content to the current open block (fallback to paragraph)
156+ private func addContentToCurrentBlock(
157+ _ line: [ any CodeToken < MarkdownTokenElement > ] ,
158+ context: inout CodeConstructContext < Node , Token > ,
159+ state: MarkdownConstructState
160+ ) {
161+ // If we reach here, treat as paragraph content
162+ // This is a simplified fallback - in a real implementation,
163+ // this should delegate to a paragraph builder
164+ if context. current. element != . paragraph {
165+ let dummyString = " "
166+ let range = dummyString. startIndex..< dummyString. endIndex
167+ let paragraph = ParagraphNode ( range: range)
168+ context. current. append ( paragraph)
169+ context. current = paragraph
170+ }
171+
172+ // Add line content to paragraph (simplified)
173+ // In real implementation, this should be handled by paragraph builder
174+ }
175+
176+ /// Check if the line has been fully processed
177+ private func isLineFullyProcessed(
178+ _ line: [ any CodeToken < MarkdownTokenElement > ] ,
179+ state: MarkdownConstructState
180+ ) -> Bool {
181+ return state. position >= line. count
182+ }
183+
184+ /// Extract lines from tokens (same logic as original)
185+ private func extractLines( from context: CodeConstructContext < Node , Token > ) -> [ [ any CodeToken < MarkdownTokenElement > ] ] {
186+ var result : [ [ any CodeToken < MarkdownTokenElement > ] ] = [ ]
187+ var line : [ any CodeToken < MarkdownTokenElement > ] = [ ]
188+ var index = context. consuming
189+
190+ while index < context. tokens. count {
191+ let token = context. tokens [ index]
192+
193+ if token. element == . eof {
194+ if !line. isEmpty {
195+ line. append ( MarkdownToken ( element: . newline, text: token. text, range: token. range) )
196+ result. append ( line)
197+ }
198+ result. append ( [ ] )
199+ break
200+ } else if token. element == . newline {
201+ line. append ( token)
202+ result. append ( line)
203+ line = [ ]
204+ index += 1
205+ } else {
206+ line. append ( token)
207+ index += 1
208+ }
209+ }
210+
211+ return result
212+ }
213+ }
0 commit comments