- tiny
- independent json reader
- line by line parsing
- serializable parser state
- fast
// a syntax is a regex pattern to match (begin/end, begin/while or a simple match)
struct syntax_t {
regex_t begin
regex_t end
regex_t match
// regexes may find the start or the end of a region
// or it may be simple match
// when a match is found, its region may be assigned a scope
string scope_name
// or it may proceed to match other patterns
syntax_t patterns[]
// a syntax contains therefore a tree structure of syntaxes
// regex matches may capture regions and assign scope upon them
regex_match_t captures
// syntax may also b an 'include' which references another defined syntax
syntax_t *include
};
// parsing a buffer requires 1. stack of parser states; and 2. a buffer range
// the stack of parser states at a minimum includes a syntax.
// it may also include matched results for each syntax
top = stack_top()
start = buffer_start
end = buffer_end
do {
if (top.patterns) {
pattern_match = match_first_pattern()
}
if (top.end) {
// match_end could consider captures from match_begin
end_match = match_end()
}
if (top.while) {
while_match = match_while()
}
if (top.while && !while_match) {
// a begin/while pattern exits when while_match fails
stack_pop()
} else if (end_match && (!pattern_match || pattern_match.first <= end_match.first )) {
// pattern matches are prioritizes over ending matches
pattern_match = end_match
start = pattern_match.first
end = pattern_match.last
// when an end is found for the top syntax
// captures are processed. it may expand a scope (keyword.$1)
process_captures()
// the top syntax is discarded
stack_pop()
top = stack_top()
} else {
if (!pattern_match) break
if (pattern_match == begin_pattern) {
// when a begin is found
process_captures()
// it is added to the stack
// and its patterns will be considered on the next loop,
// until the syntax is discarded at end
stack_push(pattern_match)
top = stack_top()
} else if (pattern_match == match_pattern) {
// a simple match processes only captures
process_captures()
}
}
position = end
} while(true)
process_captures(sytanx, captures)
{
// assign scopes to captures
// can be recursive if it contains matches
if (captures.pattern) {
process_captures()
}
}
- improve scope resolution
https://macromates.com/manual/en/language_grammars https://www.apeth.com/nonblog/stories/textmatebundle.html