Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
7f48124
Initial plan
Copilot Sep 1, 2025
6b33155
Initial attempts to fix blockquote + fenced code block interaction
Copilot Sep 1, 2025
a5db09b
Fix backslash hard line break - remove trailing backslash from content
Copilot Sep 1, 2025
e6b2219
Fix thematic break vs list precedence - add thematic break detection …
Copilot Sep 1, 2025
c306dc8
Design new CommonMark-compliant block builder architecture
Copilot Sep 1, 2025
e3164ab
Replace old MarkdownBlockBuilder with new CommonMark-compliant archit…
Copilot Sep 1, 2025
e6bd1fa
Complete migration: remove all adapters and old implementations
Copilot Sep 1, 2025
26c9ee4
Rename CommonMark to Markdown naming and implement inline builders wi…
Copilot Sep 1, 2025
6c68fd1
Implement fully pluggable block and inline builder architecture
Copilot Sep 1, 2025
f766c59
Refactor to pure dispatcher architecture without configuration system
Copilot Sep 1, 2025
787ace4
Implement pure dispatcher architecture with CommonMark algorithm dele…
Copilot Sep 1, 2025
b878c2e
Refactor to leverage CodeParserCore tokenizer with simple CodeNodeBui…
Copilot Sep 1, 2025
e0db2d7
Implement new CommonMark-compliant block builder architecture with pl…
Copilot Sep 1, 2025
b94c100
Delete Documents/MarkdownConfiguration.md
DongyuZhao Sep 1, 2025
964ed9d
Implement working CommonMark-compliant block parsing with pluggable a…
Copilot Sep 1, 2025
176ed91
Implement ATX headings and thematic breaks - 6 ATX and 10+ thematic b…
Copilot Sep 1, 2025
cdc1112
Add blockquotes, setext headings, and fenced code blocks - 10 additio…
Copilot Sep 1, 2025
edda2ec
Fix ATX heading space requirement - 9 additional tests passing (560 f…
Copilot Sep 1, 2025
48706b6
Add list item builder - 2 additional tests passing (558 failures down…
Copilot Sep 1, 2025
7014cfc
Implement token-based inline processing for ATX headings with escape …
Copilot Sep 1, 2025
5d9059a
Fix paragraph processing and block interruption logic
Copilot Sep 1, 2025
0e2b25e
Add inline processing to paragraph builder and continue architecture …
Copilot Sep 1, 2025
8fee478
Fix anti-pattern: implement token-based inline processing instead of …
Copilot Sep 1, 2025
2b2bd01
Improve emphasis processing: add strong emphasis support and proper d…
Copilot Sep 1, 2025
936f9fd
Implement comprehensive inline processing: add code spans, strikethro…
Copilot Sep 1, 2025
457faab
Fix emphasis delimiter stack algorithm: correct multiple of 3 rule an…
Copilot Sep 1, 2025
552ca51
Fix code span precedence and matching logic for proper CommonMark com…
Copilot Sep 3, 2025
2f3f45c
Fix line break processing in paragraphs for proper soft/hard line bre…
Copilot Sep 3, 2025
4f7d22d
Fix thematic break recognition by implementing token-based detection …
Copilot Sep 3, 2025
ff21719
Fix emphasis delimiter stack algorithm and code span line ending proc…
Copilot Sep 5, 2025
bca677b
Fix empty code span recognition (`` -> code(""))
Copilot Sep 5, 2025
02c92a9
Fix paragraph indentation handling - strip leading spaces per CommonM…
Copilot Sep 5, 2025
f828460
Fix soft and hard line break processing: implement proper tab expansi…
Copilot Sep 5, 2025
a0ae660
Fix fenced code block anti-patterns: implement proper token-based pro…
Copilot Sep 5, 2025
41a18a4
Fix block interruption logic: implement proper token-based fenced cod…
Copilot Sep 5, 2025
36feadd
Clean up context and state management: use AST as single source of truth
Copilot Sep 5, 2025
8ad9bb5
Simplify state management: remove redundant structures and clean up M…
Copilot Sep 5, 2025
72da257
Implement token-based processing with AST as single source of truth
Copilot Sep 5, 2025
866ea12
Add package-level indentation properties and implement recursive bloc…
Copilot Sep 5, 2025
bc42321
Refactor blockquote builder to eliminate coupling anti-pattern: consu…
Copilot Sep 5, 2025
360e7ed
Fix infinite loop in blockquote processing and add safety checks
Copilot Sep 5, 2025
4ed6172
Fix coupling anti-pattern in blockquote builder: implement proper tok…
Copilot Sep 5, 2025
006a478
Fix soft line break issue in blockquote test case and paragraph proce…
Copilot Sep 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 12 additions & 120 deletions Sources/CodeParserCollection/Markdown/MarkdownConstructState.swift
Original file line number Diff line number Diff line change
@@ -1,51 +1,25 @@
import CodeParserCore
import Foundation

/// Main construction state for Markdown language with line-based processing
/// Minimal construction state for Markdown language
/// Only contains state that cannot be derived from the AST (context.current)
public class MarkdownConstructState: CodeConstructState {
public typealias Node = MarkdownNodeElement
public typealias Token = MarkdownTokenElement

// Current token index in the line
public var position: Int = 0
// Flag indicates if the block builders should run another round on the same line.
public var refreshed: Bool = false
// Flag indicates if the current line is being reprocessed after partial consumption
public var isPartialLine: Bool = false

// Fenced code block state
public var openFence: OpenFenceInfo?

// HTML block state
public var openHTMLBlock: OpenHTMLBlockInfo?

/// Stack for nested list processing
public var listStack: [ListNode] = []
public var currentDefinitionList: DefinitionListNode?

/// Enhanced list context tracking for better indentation and nesting management
public var listContextStack: [ListContextInfo] = []

/// Indicates the last consumed line break formed a blank line (two or more consecutive newlines)
public var lastWasBlankLine: Bool = false

/// When a quoted blank line (`>\\n`) is seen inside a blockquote, the next quoted
/// content should start a new paragraph inside the same blockquote instead of
/// merging into the previous one.
public var pendingBlockquoteParagraphSplit: Bool = false

/// True when the previous quoted line (inside a blockquote) began with a token
/// that could start a block (e.g., `#`, `-`, `*`, `+`, number.). We use this to
/// prevent merging the next quoted line into the same paragraph, matching CommonMark
/// semantics where block-starting constructs introduce a new block.
public var prevBlockquoteLineWasBlockStart: Bool = false

/// Reference link definitions storage for resolving reference links
/// Key is normalized reference identifier (case-insensitive, whitespace collapsed)
/// Note: This cannot be derived from AST since reference definitions may appear
/// anywhere in the document and need to be available for link resolution
public var referenceDefinitions: [String: (url: String, title: String)] = [:]

/// Pending reference link definition being parsed across multiple lines
public var pendingReference: PendingReferenceDefinition?

/// Current line tokens being processed - builders can modify these
/// This allows builders to consume their part and leave remaining tokens for further processing
public var tokens: [any CodeToken<MarkdownTokenElement>] = []

/// Flag indicating if current line has been fully processed by a builder
/// When false, MarkdownBlockBuilder should continue processing the remaining tokens
public var currentLineProcessed: Bool = true

public init() {}

Expand All @@ -72,85 +46,3 @@ public class MarkdownConstructState: CodeConstructState {
.trimmingCharacters(in: .whitespacesAndNewlines)
}
}

/// Information about a pending reference link definition being parsed across multiple lines
public struct PendingReferenceDefinition {
public let identifier: String
public let referenceNode: ReferenceNode
public var hasDestination: Bool
public var hasTitle: Bool
public let originalLineTokens: [any CodeToken<MarkdownTokenElement>] // For fallback to paragraph

public init(identifier: String, referenceNode: ReferenceNode, originalLineTokens: [any CodeToken<MarkdownTokenElement>]) {
self.identifier = identifier
self.referenceNode = referenceNode
self.hasDestination = false
self.hasTitle = false
self.originalLineTokens = originalLineTokens
}
}

/// Information about an open fenced code block
public struct OpenFenceInfo {
public let character: String
public let length: Int
public let indentation: Int
public let codeBlock: CodeBlockNode

public init(character: String, length: Int, indentation: Int, codeBlock: CodeBlockNode) {
self.character = character
self.length = length
self.indentation = indentation
self.codeBlock = codeBlock
}
}

/// Information about an open HTML block
public struct OpenHTMLBlockInfo {
public let type: Int // HTML block type (1-7)
public let endCondition: String? // What string ends this block
public let htmlBlock: HTMLBlockNode

public init(type: Int, endCondition: String?, htmlBlock: HTMLBlockNode) {
self.type = type
self.endCondition = endCondition
self.htmlBlock = htmlBlock
}
}

/// Information about detected HTML block type
public struct HTMLBlockTypeInfo {
public let type: Int
public let name: String
public let closedOnSameLine: Bool
public let endCondition: String?

public init(type: Int, name: String, closedOnSameLine: Bool, endCondition: String? = nil) {
self.type = type
self.name = name
self.closedOnSameLine = closedOnSameLine
self.endCondition = endCondition
}
}

/// Enhanced list context information for better nesting and indentation management
public struct ListContextInfo {
/// The list node itself
public let list: ListNode
/// The parent list item that contains this list (nil for top-level lists)
public let parentListItem: ListItemNode?
/// The calculated indentation level for content in this list context
public let contentIndent: Int
/// The nesting level (1 for top-level, 2 for first nested, etc.)
public let level: Int
/// The marker type for compatibility checking
public let markerType: String

public init(list: ListNode, parentListItem: ListItemNode?, contentIndent: Int, level: Int, markerType: String) {
self.list = list
self.parentListItem = parentListItem
self.contentIndent = contentIndent
self.level = level
self.markerType = markerType
}
}
41 changes: 35 additions & 6 deletions Sources/CodeParserCollection/Markdown/MarkdownNodes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,16 @@ public class DocumentNode: MarkdownNodeBase {
}

// MARK: - Block Elements
public class ParagraphNode: MarkdownNodeBase {
public class ParagraphNode: MarkdownNodeBase, MarkdownBlockNode {
public var blockType: String { "paragraph" }

public init(range: Range<String.Index>) {
super.init(element: .paragraph)
}
}

public class HeaderNode: MarkdownNodeBase {
public class HeaderNode: MarkdownNodeBase, MarkdownBlockNode {
public var blockType: String { "heading" }
public var level: Int

public init(level: Int) {
Expand All @@ -130,7 +133,8 @@ public class HeaderNode: MarkdownNodeBase {
}
}

public class ThematicBreakNode: MarkdownNodeBase {
public class ThematicBreakNode: MarkdownNodeBase, MarkdownBlockNode {
public var blockType: String { "thematic_break" }
public var marker: String

public init(marker: String = "---") {
Expand All @@ -144,8 +148,14 @@ public class ThematicBreakNode: MarkdownNodeBase {
}
}

public class BlockquoteNode: MarkdownNodeBase {
public class BlockquoteNode: MarkdownNodeBase, MarkdownBlockNode {
public var blockType: String { "blockquote" }
public var level: Int

// Package-level indentation properties for nested block parsing
package var indent: Int = 0 // Number of spaces before the '>' marker
package var markerColumn: Int = 0 // Column position of the '>' marker
package var contentColumn: Int = 0 // Column position where content starts after '> '

public init(level: Int = 1) {
self.level = level
Expand Down Expand Up @@ -198,11 +208,17 @@ public class UnorderedListNode: ListNode {
}
}

public class ListItemNode: MarkdownNodeBase {
public class ListItemNode: MarkdownNodeBase, MarkdownBlockNode {
public var blockType: String { "list_item" }
public var marker: String
// indentation before marker and content indent column for continuation
public var markerIndent: Int = 0
public var contentIndent: Int = 0

// Package-level properties for enhanced nested block parsing
package var markerColumn: Int = 0 // Exact column position of the marker
package var contentColumn: Int = 0 // Exact column position where content starts
package var markerLength: Int = 0 // Length of the marker (e.g., "1." = 2, "-" = 1)

public init(marker: String) {
self.marker = marker
Expand All @@ -215,9 +231,13 @@ public class ListItemNode: MarkdownNodeBase {
}
}

public class CodeBlockNode: MarkdownNodeBase {
public class CodeBlockNode: MarkdownNodeBase, MarkdownBlockNode {
public var blockType: String { "code_block" }
public var language: String?
public var source: String

// Package-level indentation properties for nested block parsing
package var indent: Int = 0 // Number of spaces before the code block

public init(source: String, language: String? = nil) {
self.language = language
Expand Down Expand Up @@ -635,3 +655,12 @@ public class ContentNode: MarkdownNodeBase {
super.init(element: .content)
}
}

// MARK: - Type Aliases for Block Builders
public typealias MarkdownHeading = HeaderNode
public typealias MarkdownThematicBreak = ThematicBreakNode
public typealias MarkdownText = TextNode
public typealias MarkdownParagraph = ParagraphNode
public typealias MarkdownBlockquote = BlockquoteNode
public typealias MarkdownLineBreak = LineBreakNode
public typealias MarkdownListItem = ListItemNode
Loading
Loading