Skip to content

Commit

Permalink
Name the CST nodes (#710)
Browse files Browse the repository at this point in the history
Closes #703

I think this branch has reached a critical mass where the core
functionality is there and this needs tweaks/polishes that could
probably be done in subsequent PRs.

The gist of the change is that now the RuleNode has children of `type
NamedNode = (String, Node)`. The parsers now thread the expected
node/field names into the PG, which adds a `with_name` transformation,
that assigns the relevant names for significant nodes in the
intermediate parse results, which is folded to the CST using the
existing mechanism.

Trivia scanners are unnamed. The cursor still internally uses an unnamed
node not to incur cost when it's not used but a new `CursorWithNames`
wrapper is introduced, that wraps the underlying cursor and returns
`(String, Node)` items.

Quite a few things have been fixed since the PoC, mostly around
delimited, flattened fields and now the recovered nodes also have names.
Dummy names were removed and only the desired "auto-generated" ones
remain, i.e. `item`, `separator`, `variant`.

There's still an open issue how best to name/propagate the names to the
unreduced, recovered from precedence parse results (e.g. `2 * new` - the
`MultiplicativeExpression` is not yet named) but since it's only related
to the recovery and in the precedence case, we can punt it for now
(especially since recovery isn't ideal yet around the precedence
parser).

Other than that, the next thing will be collecting the field names into
an enum to cut down the memory footprint of the CST.
  • Loading branch information
Xanewok committed Dec 19, 2023
1 parent f6cf521 commit 2025b6c
Show file tree
Hide file tree
Showing 340 changed files with 11,738 additions and 9,304 deletions.
5 changes: 5 additions & 0 deletions .changeset/chilled-berries-deliver.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": minor
---

CST children nodes are now named
40 changes: 30 additions & 10 deletions crates/codegen/grammar/src/parser_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@ use std::rc::Rc;
use crate::visitor::{GrammarVisitor, Visitable};
use crate::{PrecedenceParserDefinitionRef, ScannerDefinitionRef, VersionQualityRange};

/// A named wrapper, used to give a name to a [`ParserDefinitionNode`].
#[derive(Clone, Debug)]
pub struct Named<T> {
pub name: String,
pub node: T,
}

impl<T> std::ops::Deref for Named<T> {
type Target = T;

fn deref(&self) -> &Self::Target {
&self.node
}
}

pub trait ParserDefinition: Debug {
fn name(&self) -> &'static str;
fn node(&self) -> &ParserDefinitionNode;
Expand Down Expand Up @@ -39,17 +54,17 @@ impl Visitable for TriviaParserDefinitionRef {
pub enum ParserDefinitionNode {
Versioned(Box<Self>, Vec<VersionQualityRange>),
Optional(Box<Self>),
ZeroOrMore(Box<Self>),
OneOrMore(Box<Self>),
Sequence(Vec<Self>),
Choice(Vec<Self>),
ZeroOrMore(Named<Box<Self>>),
OneOrMore(Named<Box<Self>>),
Sequence(Vec<Named<Self>>),
Choice(Named<Vec<Self>>),
ScannerDefinition(ScannerDefinitionRef),
TriviaParserDefinition(TriviaParserDefinitionRef),
ParserDefinition(ParserDefinitionRef),
PrecedenceParserDefinition(PrecedenceParserDefinitionRef),
DelimitedBy(Box<Self>, Box<Self>, Box<Self>),
SeparatedBy(Box<Self>, Box<Self>),
TerminatedBy(Box<Self>, Box<Self>),
DelimitedBy(Named<Box<Self>>, Box<Self>, Named<Box<Self>>),
SeparatedBy(Named<Box<Self>>, Named<Box<Self>>),
TerminatedBy(Box<Self>, Named<Box<Self>>),
}

impl From<ScannerDefinitionRef> for ParserDefinitionNode {
Expand Down Expand Up @@ -82,10 +97,15 @@ impl Visitable for ParserDefinitionNode {
match self {
Self::Versioned(node, _)
| Self::Optional(node)
| Self::ZeroOrMore(node)
| Self::OneOrMore(node) => node.accept_visitor(visitor),
| Self::ZeroOrMore(Named { node, .. })
| Self::OneOrMore(Named { node, .. }) => node.accept_visitor(visitor),

Self::Sequence(nodes) | Self::Choice(nodes) => {
Self::Sequence(nodes) => {
for node in nodes {
node.accept_visitor(visitor);
}
}
Self::Choice(Named { node: nodes, .. }) => {
for node in nodes {
node.accept_visitor(visitor);
}
Expand Down
178 changes: 131 additions & 47 deletions crates/codegen/parser/generator/src/parser_definition.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use codegen_grammar::{
ParserDefinitionNode, ParserDefinitionRef, TriviaParserDefinitionRef, VersionQuality,
Named, ParserDefinitionNode, ParserDefinitionRef, TriviaParserDefinitionRef, VersionQuality,
VersionQualityRange,
};
use inflector::Inflector;
Expand Down Expand Up @@ -46,60 +46,73 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
}
}

Self::ZeroOrMore(node) => {
Self::ZeroOrMore(Named { name, node }) => {
let parser = node.to_parser_code(context_name, is_trivia);

let parser = if name.is_empty() {
parser
} else {
quote! {
#parser.with_name(#name)
}
};

quote! {
ZeroOrMoreHelper::run(input, |input| #parser)
}
}

Self::OneOrMore(node) => {
Self::OneOrMore(Named { name, node }) => {
let parser = node.to_parser_code(context_name, is_trivia);

let parser = if name.is_empty() {
parser
} else {
quote! {
#parser.with_name(#name)
}
};

quote! {
OneOrMoreHelper::run(input, |input| #parser)
}
}

Self::Sequence(nodes) => {
if nodes.len() == 1 {
nodes[0].to_parser_code(context_name, is_trivia)
} else {
let parsers = nodes
.iter()
.map(|node| {
let parser = node.to_parser_code(context_name, is_trivia);
node.applicable_version_quality_ranges()
.wrap_code(quote! { seq.elem(#parser)?; }, None)
})
.collect::<Vec<_>>();
quote! {
SequenceHelper::run(|mut seq| {
#(#parsers)*
seq.finish()
})
Self::Sequence(nodes) => match &nodes[..] {
[Named { name, node }] => {
let parser = node.to_parser_code(context_name, is_trivia);

if name.is_empty() {
parser
} else {
quote! {
#parser.with_name(#name)
}
}
}
}
nodes => make_sequence_versioned(nodes.iter().map(|Named { name, node }| {
(
node.to_parser_code(context_name, is_trivia),
name.clone(),
node.applicable_version_quality_ranges(),
)
})),
},

Self::Choice(nodes) => {
let parsers = nodes
.iter()
.map(|node| {
let parser = node.to_parser_code(context_name, is_trivia);
node.applicable_version_quality_ranges().wrap_code(
quote! {
let result = #parser;
choice.consider(input, result)?;
},
None,
)
})
.collect::<Vec<_>>();
quote! {
ChoiceHelper::run(input, |mut choice, input| {
#(#parsers)*
choice.finish(input)
})
Self::Choice(Named { name, node: nodes }) => {
let parser = make_choice_versioned(nodes.iter().map(|node| {
(
node.to_parser_code(context_name, is_trivia),
node.applicable_version_quality_ranges(),
)
}));

if name.is_empty() {
parser
} else {
quote! {
#parser.with_name(#name)
}
}
}

Expand Down Expand Up @@ -155,6 +168,8 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
}

Self::DelimitedBy(open, body, close) => {
let open_field_name = &open.name;
let close_field_name = &close.name;
let [open_delim, close_delim] = match (open.as_ref(), close.as_ref()) {
(
ParserDefinitionNode::ScannerDefinition(open, ..),
Expand Down Expand Up @@ -182,34 +197,39 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
let mut delim_guard = input.open_delim(TokenKind::#close_delim);
let input = delim_guard.ctx();

seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#open_delim))?;
seq.elem_named(#open_field_name, self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#open_delim))?;
#body_parser
seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#close_delim))?;
seq.elem_named(#close_field_name, self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#close_delim))?;
seq.finish()
})
}
}

Self::SeparatedBy(body, separator) => {
let separator_field_name = &separator.name;
let separator = match separator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => {
format_ident!("{name}", name = scanner.name())
}
_ => unreachable!("Only tokens are permitted as separators"),
};

let body_field_name = &body.name;
let parser = body.to_parser_code(context_name, is_trivia);

quote! {
SeparatedHelper::run::<_, #lex_ctx>(
input,
self,
|input| #parser,
|input| #parser.with_name(#body_field_name),
TokenKind::#separator,
#separator_field_name,
)
}
}
Self::TerminatedBy(body, terminator) => {
let terminator_field_name = &terminator.name;

let terminator = match terminator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => {
format_ident!("{name}", name = scanner.name())
Expand All @@ -220,7 +240,8 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
let parser = body.to_parser_code(context_name, is_trivia);
let body_parser = body.applicable_version_quality_ranges().wrap_code(
quote! {
seq.elem(#parser
seq.elem(
#parser
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#terminator,
Expand All @@ -234,7 +255,10 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
quote! {
SequenceHelper::run(|mut seq| {
#body_parser
seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#terminator))?;
seq.elem_named(
#terminator_field_name,
self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#terminator)
)?;
seq.finish()
})
}
Expand All @@ -249,8 +273,10 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
}

ParserDefinitionNode::Optional(node)
| ParserDefinitionNode::ZeroOrMore(node)
| ParserDefinitionNode::OneOrMore(node) => node.applicable_version_quality_ranges(),
| ParserDefinitionNode::ZeroOrMore(Named { node, .. })
| ParserDefinitionNode::OneOrMore(Named { node, .. }) => {
node.applicable_version_quality_ranges()
}

_ => vec![],
}
Expand Down Expand Up @@ -282,3 +308,61 @@ impl VersionQualityRangeVecExtensions for Vec<VersionQualityRange> {
}
}
}

pub fn make_sequence(parsers: impl IntoIterator<Item = TokenStream>) -> TokenStream {
make_sequence_versioned(
parsers
.into_iter()
.map(|parser| (parser, String::new(), vec![])),
)
}

pub fn make_sequence_versioned(
parsers: impl IntoIterator<Item = (TokenStream, String, Vec<VersionQualityRange>)>,
) -> TokenStream {
let parsers = parsers
.into_iter()
.map(|(parser, name, versions)| {
let code = if name.is_empty() {
quote! { seq.elem(#parser)?; }
} else {
quote! { seq.elem_named(#name, #parser)?; }
};

versions.wrap_code(code, None)
})
.collect::<Vec<_>>();
quote! {
SequenceHelper::run(|mut seq| {
#(#parsers)*
seq.finish()
})
}
}

pub fn make_choice(parsers: impl IntoIterator<Item = TokenStream>) -> TokenStream {
make_choice_versioned(parsers.into_iter().map(|parser| (parser, vec![])))
}

fn make_choice_versioned(
parsers: impl IntoIterator<Item = (TokenStream, Vec<VersionQualityRange>)>,
) -> TokenStream {
let parsers = parsers
.into_iter()
.map(|(parser, versions)| {
versions.wrap_code(
quote! {
let result = #parser;
choice.consider(input, result)?;
},
None,
)
})
.collect::<Vec<_>>();
quote! {
ChoiceHelper::run(input, |mut choice, input| {
#(#parsers)*
choice.finish(input)
})
}
}

0 comments on commit 2025b6c

Please sign in to comment.