Skip to content

Commit

Permalink
Fix parsing ambiguous call options (#862)
Browse files Browse the repository at this point in the history
Based on #853 

Closes #850
Closes #861

---------

Co-authored-by: OmarTawfik <15987992+OmarTawfik@users.noreply.github.com>
  • Loading branch information
Xanewok and OmarTawfik committed Mar 8, 2024
1 parent a9bd8da commit 5e37ea0
Show file tree
Hide file tree
Showing 66 changed files with 1,249 additions and 930 deletions.
5 changes: 5 additions & 0 deletions .changeset/shy-pets-attack.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": patch
---

allow call options as a post-fix expression
13 changes: 7 additions & 6 deletions crates/codegen/grammar/src/constructor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ use codegen_language_definition::model::{self, FieldsErrorRecovery, Identifier,
use indexmap::IndexMap;

use crate::{
Grammar, GrammarElement, KeywordScannerDefinition, KeywordScannerDefinitionNode,
KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition, ParserDefinitionNode,
PrecedenceOperatorModel, PrecedenceParserDefinition, PrecedenceParserDefinitionNode,
ScannerDefinition, ScannerDefinitionNode, TriviaParserDefinition, VersionQuality,
VersionQualityRange,
DelimitedRecoveryTokenThreshold, Grammar, GrammarElement, KeywordScannerDefinition,
KeywordScannerDefinitionNode, KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition,
ParserDefinitionNode, PrecedenceOperatorModel, PrecedenceParserDefinition,
PrecedenceParserDefinitionNode, ScannerDefinition, ScannerDefinitionNode,
TriviaParserDefinition, VersionQuality, VersionQualityRange,
};

/// Materializes the DSL v2 model ([`model::Language`]) into [`Grammar`].
Expand Down Expand Up @@ -610,7 +610,8 @@ fn resolve_sequence_like(
let open = delims.next().unwrap();
let close = delims.next().unwrap();

ParserDefinitionNode::DelimitedBy(open, Box::new(delimited_body), close)
let threshold = DelimitedRecoveryTokenThreshold::from(delimiters);
ParserDefinitionNode::DelimitedBy(open, Box::new(delimited_body), close, threshold)
};
// Replace with a new delimited node
fields.insert(
Expand Down
30 changes: 28 additions & 2 deletions crates/codegen/grammar/src/parser_definition.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::fmt::Debug;
use std::rc::Rc;

use codegen_language_definition::model;

use crate::visitor::{GrammarVisitor, Visitable};
use crate::{
KeywordScannerDefinitionRef, PrecedenceParserDefinitionRef, ScannerDefinitionRef,
Expand Down Expand Up @@ -53,6 +55,25 @@ impl Visitable for TriviaParserDefinitionRef {
}
}

/// How many tokens have to be matched to trigger the error recovery.
/// For ambiguous syntaxes this needs to be set to at least N, where N
/// is the token lookahead required to disambiguate the syntax.
///
// By default, we assume no lookahead (0) is required to recover from
// unrecognized body between delimiters, so it's always triggered.
#[derive(Clone, Debug, Default)]
pub struct DelimitedRecoveryTokenThreshold(pub u8);

impl From<model::FieldDelimiters> for DelimitedRecoveryTokenThreshold {
fn from(delimiters: model::FieldDelimiters) -> Self {
Self(
delimiters
.tokens_matched_acceptance_threshold
.unwrap_or(DelimitedRecoveryTokenThreshold::default().0),
)
}
}

#[derive(Clone, Debug)]
pub enum ParserDefinitionNode {
Versioned(Box<Self>, Vec<VersionQualityRange>),
Expand All @@ -66,7 +87,12 @@ pub enum ParserDefinitionNode {
TriviaParserDefinition(TriviaParserDefinitionRef),
ParserDefinition(ParserDefinitionRef),
PrecedenceParserDefinition(PrecedenceParserDefinitionRef),
DelimitedBy(Labeled<Box<Self>>, Box<Self>, Labeled<Box<Self>>),
DelimitedBy(
Labeled<Box<Self>>,
Box<Self>,
Labeled<Box<Self>>,
DelimitedRecoveryTokenThreshold,
),
SeparatedBy(Labeled<Box<Self>>, Labeled<Box<Self>>),
TerminatedBy(Box<Self>, Labeled<Box<Self>>),
}
Expand Down Expand Up @@ -115,7 +141,7 @@ impl Visitable for ParserDefinitionNode {
}
}

Self::DelimitedBy(open, body, close) => {
Self::DelimitedBy(open, body, close, ..) => {
open.accept_visitor(visitor);
body.accept_visitor(visitor);
close.accept_visitor(visitor);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ impl ParseInputTokens for usize {
}
}

impl ParseInputTokens for u8 {
fn parse_value(input: ParseStream<'_>, _: &mut ErrorsCollection) -> Result<Self> {
let literal = ParseHelpers::syn::<syn::LitInt>(input)?;
let value = literal.base10_parse::<u8>()?;

Ok(value)
}
}

impl<T: ParseInputTokens> ParseInputTokens for Vec<T> {
fn parse_value(input: ParseStream<'_>, errors: &mut ErrorsCollection) -> Result<Self> {
Ok(ParseHelpers::sequence(input, errors))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,16 @@ impl WriteOutputTokens for usize {
}
}

impl WriteOutputTokens for u8 {
fn write_output_tokens(&self) -> TokenStream {
let value = Literal::u8_suffixed(*self);

quote! {
#value.into()
}
}
}

impl<T: WriteOutputTokens> WriteOutputTokens for Vec<T> {
fn write_output_tokens(&self) -> TokenStream {
let items = self.iter().map(T::write_output_tokens);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ pub struct FieldsErrorRecovery {
pub struct FieldDelimiters {
pub open: Identifier,
pub close: Identifier,
/// How many tokens have to be matched to trigger the error recovery.
/// For ambiguous syntaxes this needs to be set to at least N, where N
/// is the token lookahead required to disambiguate the syntax.
///
/// By default, we assume no lookahead (0) is required to recover from
/// unrecognized body between delimiters, so it's always triggered.
pub tokens_matched_acceptance_threshold: Option<u8>,
}

#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ fn get_spanned_type(input: Type) -> Type {
}

// External types should also be wrapped in 'Spanned<T>':
"bool" | "char" | "PathBuf" | "String" | "Version" => {
"bool" | "char" | "PathBuf" | "String" | "u8" | "Version" => {
parse_quote! {
crate::internals::Spanned<#input>
}
Expand Down
8 changes: 5 additions & 3 deletions crates/codegen/parser/generator/src/parser_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
quote! { self.#function_name(input) }
}

Self::DelimitedBy(open, body, close) => {
Self::DelimitedBy(open, body, close, threshold) => {
let open_label = format_ident!("{}", open.label.to_pascal_case());
let close_label = format_ident!("{}", close.label.to_pascal_case());
let [open_delim, close_delim] = match (open.as_ref(), close.as_ref()) {
Expand All @@ -201,6 +201,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
) => [open, close].map(|scanner| format_ident!("{}", scanner.name())),
_ => unreachable!("Only tokens are permitted as delimiters"),
};
let threshold = threshold.0;

let parser = body.to_parser_code(context_name, is_trivia);
let body_parser = body.applicable_version_quality_ranges().wrap_code(
Expand All @@ -209,7 +210,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#close_delim,
RecoverFromNoMatch::Yes,
TokenAcceptanceThreshold(#threshold),
)
)?;
},
Expand Down Expand Up @@ -275,7 +276,8 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#terminator,
RecoverFromNoMatch::No,
// Requires at least a partial match not to risk misparsing
TokenAcceptanceThreshold(1u8),
)
)?;
},
Expand Down
2 changes: 1 addition & 1 deletion crates/codegen/parser/generator/src/rust_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ impl GrammarVisitor for RustGenerator {
}

// Collect delimiters for each context
ParserDefinitionNode::DelimitedBy(open, _, close) => {
ParserDefinitionNode::DelimitedBy(open, _, close, ..) => {
self.labels.insert(open.label.clone());
self.labels.insert(close.label.clone());

Expand Down
2 changes: 1 addition & 1 deletion crates/codegen/parser/runtime/src/parser_support/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub(crate) use parser_result::ParserResult;
#[allow(unused_imports)]
pub(crate) use precedence_helper::PrecedenceHelper;
#[allow(unused_imports)]
pub(crate) use recovery::RecoverFromNoMatch;
pub(crate) use recovery::TokenAcceptanceThreshold;
#[allow(unused_imports)]
pub(crate) use repetition_helper::{OneOrMoreHelper, ZeroOrMoreHelper};
#[allow(unused_imports)]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::ops::ControlFlow;

use crate::cst::{self, LabeledNode};
use crate::cst::{self, LabeledNode, Node};
use crate::kinds::{NodeLabel, RuleKind, TokenKind};
use crate::text_index::TextIndex;

Expand Down Expand Up @@ -201,6 +201,33 @@ impl IncompleteMatch {
expected_tokens,
}
}

/// Whether this prefix-matched at least `n` (non-skipped) significant tokens.
pub fn matches_at_least_n_tokens(&self, n: u8) -> bool {
let result = self
.nodes
.iter()
.flat_map(|node| node.cursor_with_offset(TextIndex::ZERO))
.try_fold(0u8, |mut acc, node| {
match node {
Node::Token(tok) if tok.kind != TokenKind::SKIPPED && !tok.kind.is_trivia() => {
acc += 1;
}
_ => {}
}

// Short-circuit not to walk the whole tree if we've already matched enough
if acc >= n {
ControlFlow::Break(acc)
} else {
ControlFlow::Continue(acc)
}
});

match result {
ControlFlow::Continue(value) | ControlFlow::Break(value) => value >= n,
}
}
}

#[derive(PartialEq, Eq, Clone, Debug)]
Expand Down
33 changes: 15 additions & 18 deletions crates/codegen/parser/runtime/src/parser_support/recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,11 @@ use crate::parser_support::parser_result::SkippedUntil;
use crate::parser_support::ParserResult;
use crate::text_index::{TextRange, TextRangeExtensions as _};

/// An explicit parameter for the [`ParserResult::recover_until_with_nested_delims`] method.
/// How many tokens have to be matched to trigger the error recovery.
/// For ambiguous syntaxes this needs to be set to at least N, where N
/// is the token lookahead required to disambiguate the syntax.
#[derive(Clone, Copy)]
pub(crate) enum RecoverFromNoMatch {
Yes,
No,
}

impl RecoverFromNoMatch {
pub fn as_bool(self) -> bool {
matches!(self, RecoverFromNoMatch::Yes)
}
}
pub(crate) struct TokenAcceptanceThreshold(pub(crate) u8);

fn opt_parse(
input: &mut ParserContext<'_>,
Expand Down Expand Up @@ -46,7 +39,7 @@ impl ParserResult {
input: &mut ParserContext<'_>,
lexer: &L,
expected: TokenKind,
recover_from_no_match: RecoverFromNoMatch,
acceptance_threshold: TokenAcceptanceThreshold,
) -> ParserResult {
enum ParseResultKind {
Match,
Expand All @@ -57,11 +50,15 @@ impl ParserResult {
let before_recovery = input.position();

let (mut nodes, mut expected_tokens, result_kind) = match self {
ParserResult::IncompleteMatch(result) => (
result.nodes,
result.expected_tokens,
ParseResultKind::Incomplete,
),
ParserResult::IncompleteMatch(result)
if result.matches_at_least_n_tokens(acceptance_threshold.0) =>
{
(
result.nodes,
result.expected_tokens,
ParseResultKind::Incomplete,
)
}
ParserResult::Match(result)
if lexer
.peek_token_with_trivia::<LexCtx>(input)
Expand All @@ -70,7 +67,7 @@ impl ParserResult {
{
(result.nodes, result.expected_tokens, ParseResultKind::Match)
}
ParserResult::NoMatch(result) if recover_from_no_match.as_bool() => {
ParserResult::NoMatch(result) if acceptance_threshold.0 == 0 => {
(vec![], result.expected_tokens, ParseResultKind::NoMatch)
}
// No need to recover, so just return as-is.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::napi_interface::parse_output::ParseOutput as NAPIParseOutput;
use crate::parse_output::ParseOutput;
use crate::parser_support::{
ChoiceHelper, OneOrMoreHelper, OptionalHelper, ParserContext, ParserFunction, ParserResult,
PrecedenceHelper, RecoverFromNoMatch, SeparatedHelper, SequenceHelper, ZeroOrMoreHelper,
PrecedenceHelper, SeparatedHelper, SequenceHelper, TokenAcceptanceThreshold, ZeroOrMoreHelper,
};

#[derive(Debug)]
Expand Down
54 changes: 30 additions & 24 deletions crates/solidity/inputs/language/src/definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3378,12 +3378,29 @@ codegen_language_macros::compile!(Language(
name = FunctionCallExpression,
operators = [PrecedenceOperator(
model = Postfix,
fields = (arguments = Required(ArgumentsDeclaration))
)]
),
PrecedenceExpression(
name = CallOptionsExpression,
operators = [PrecedenceOperator(
model = Postfix,
enabled = From("0.6.2"),
error_recovery = FieldsErrorRecovery(
delimiters = FieldDelimiters(
open = open_brace,
close = close_brace,
// NOTE: Despite `CallOptions` requiring at least one element,
// we can only recover if we found at least two tokens (`ident:`)
// in the body, as this may be otherwise ambiguous with
// `try <EXPR> { func() } catch {}`.
tokens_matched_acceptance_threshold = 2
)
),
fields = (
options = Optional(
reference = FunctionCallOptions,
enabled = From("0.6.2")
),
arguments = Required(ArgumentsDeclaration)
open_brace = Required(OpenBrace),
options = Required(CallOptions),
close_brace = Required(CloseBrace)
)
)]
),
Expand Down Expand Up @@ -3456,20 +3473,6 @@ codegen_language_macros::compile!(Language(
Topic(
title = "Function Calls",
items = [
Enum(
name = FunctionCallOptions,
enabled = From("0.6.2"),
variants = [
EnumVariant(
reference = NamedArgumentGroups,
enabled = Range(from = "0.6.2", till = "0.8.0")
),
EnumVariant(
reference = NamedArgumentGroup,
enabled = From("0.8.0")
)
]
),
Enum(
name = ArgumentsDeclaration,
variants = [
Expand Down Expand Up @@ -3507,11 +3510,6 @@ codegen_language_macros::compile!(Language(
close_paren = Required(CloseParen)
)
),
Repeated(
name = NamedArgumentGroups,
reference = NamedArgumentGroup,
enabled = Range(from = "0.6.2", till = "0.8.0")
),
Struct(
name = NamedArgumentGroup,
error_recovery = FieldsErrorRecovery(
Expand All @@ -3530,6 +3528,14 @@ codegen_language_macros::compile!(Language(
separator = Comma,
allow_empty = true
),
Separated(
name = CallOptions,
reference = NamedArgument,
separator = Comma,
enabled = From("0.6.2"),
// These cannot be empty as they're ambiguous with `try <EXPR> {} catch {}`
allow_empty = false
),
Struct(
name = NamedArgument,
fields = (
Expand Down

0 comments on commit 5e37ea0

Please sign in to comment.