Fix parsing ambiguous call options (#862)

Based on #853 Closes #850 Closes #861 --------- Co-authored-by: OmarTawfik <15987992+OmarTawfik@users.noreply.github.com>
NomicFoundation · Mar 8, 2024 · 5e37ea0 · 5e37ea0
1 parent a9bd8da
commit 5e37ea0
Show file tree

Hide file tree

Showing 66 changed files with 1,249 additions and 930 deletions.
diff --git a/.changeset/shy-pets-attack.md b/.changeset/shy-pets-attack.md
@@ -0,0 +1,5 @@
+---
+"@nomicfoundation/slang": patch
+---
+
+allow call options as a post-fix expression
diff --git a/crates/codegen/grammar/src/constructor.rs b/crates/codegen/grammar/src/constructor.rs
@@ -10,11 +10,11 @@ use codegen_language_definition::model::{self, FieldsErrorRecovery, Identifier,
 use indexmap::IndexMap;
 
 use crate::{
-    Grammar, GrammarElement, KeywordScannerDefinition, KeywordScannerDefinitionNode,
-    KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition, ParserDefinitionNode,
-    PrecedenceOperatorModel, PrecedenceParserDefinition, PrecedenceParserDefinitionNode,
-    ScannerDefinition, ScannerDefinitionNode, TriviaParserDefinition, VersionQuality,
-    VersionQualityRange,
+    DelimitedRecoveryTokenThreshold, Grammar, GrammarElement, KeywordScannerDefinition,
+    KeywordScannerDefinitionNode, KeywordScannerDefinitionVersionedNode, Labeled, ParserDefinition,
+    ParserDefinitionNode, PrecedenceOperatorModel, PrecedenceParserDefinition,
+    PrecedenceParserDefinitionNode, ScannerDefinition, ScannerDefinitionNode,
+    TriviaParserDefinition, VersionQuality, VersionQualityRange,
 };
 
 /// Materializes the DSL v2 model ([`model::Language`]) into [`Grammar`].
@@ -610,7 +610,8 @@ fn resolve_sequence_like(
             let open = delims.next().unwrap();
             let close = delims.next().unwrap();
 
-            ParserDefinitionNode::DelimitedBy(open, Box::new(delimited_body), close)
+            let threshold = DelimitedRecoveryTokenThreshold::from(delimiters);
+            ParserDefinitionNode::DelimitedBy(open, Box::new(delimited_body), close, threshold)
         };
         // Replace with a new delimited node
         fields.insert(

diff --git a/crates/codegen/grammar/src/parser_definition.rs b/crates/codegen/grammar/src/parser_definition.rs
@@ -1,6 +1,8 @@
 use std::fmt::Debug;
 use std::rc::Rc;
 
+use codegen_language_definition::model;
+
 use crate::visitor::{GrammarVisitor, Visitable};
 use crate::{
     KeywordScannerDefinitionRef, PrecedenceParserDefinitionRef, ScannerDefinitionRef,
@@ -53,6 +55,25 @@ impl Visitable for TriviaParserDefinitionRef {
     }
 }
 
+/// How many tokens have to be matched to trigger the error recovery.
+/// For ambiguous syntaxes this needs to be set to at least N, where N
+/// is the token lookahead required to disambiguate the syntax.
+///
+// By default, we assume no lookahead (0) is required to recover from
+// unrecognized body between delimiters, so it's always triggered.
+#[derive(Clone, Debug, Default)]
+pub struct DelimitedRecoveryTokenThreshold(pub u8);
+
+impl From<model::FieldDelimiters> for DelimitedRecoveryTokenThreshold {
+    fn from(delimiters: model::FieldDelimiters) -> Self {
+        Self(
+            delimiters
+                .tokens_matched_acceptance_threshold
+                .unwrap_or(DelimitedRecoveryTokenThreshold::default().0),
+        )
+    }
+}
+
 #[derive(Clone, Debug)]
 pub enum ParserDefinitionNode {
     Versioned(Box<Self>, Vec<VersionQualityRange>),
@@ -66,7 +87,12 @@ pub enum ParserDefinitionNode {
     TriviaParserDefinition(TriviaParserDefinitionRef),
     ParserDefinition(ParserDefinitionRef),
     PrecedenceParserDefinition(PrecedenceParserDefinitionRef),
-    DelimitedBy(Labeled<Box<Self>>, Box<Self>, Labeled<Box<Self>>),
+    DelimitedBy(
+        Labeled<Box<Self>>,
+        Box<Self>,
+        Labeled<Box<Self>>,
+        DelimitedRecoveryTokenThreshold,
+    ),
     SeparatedBy(Labeled<Box<Self>>, Labeled<Box<Self>>),
     TerminatedBy(Box<Self>, Labeled<Box<Self>>),
 }
@@ -115,7 +141,7 @@ impl Visitable for ParserDefinitionNode {
                 }
             }
 
-            Self::DelimitedBy(open, body, close) => {
+            Self::DelimitedBy(open, body, close, ..) => {
                 open.accept_visitor(visitor);
                 body.accept_visitor(visitor);
                 close.accept_visitor(visitor);

diff --git a/crates/codegen/language/definition/src/internals/parse_input_tokens/external_types.rs b/crates/codegen/language/definition/src/internals/parse_input_tokens/external_types.rs
@@ -140,6 +140,15 @@ impl ParseInputTokens for usize {
     }
 }
 
+impl ParseInputTokens for u8 {
+    fn parse_value(input: ParseStream<'_>, _: &mut ErrorsCollection) -> Result<Self> {
+        let literal = ParseHelpers::syn::<syn::LitInt>(input)?;
+        let value = literal.base10_parse::<u8>()?;
+
+        Ok(value)
+    }
+}
+
 impl<T: ParseInputTokens> ParseInputTokens for Vec<T> {
     fn parse_value(input: ParseStream<'_>, errors: &mut ErrorsCollection) -> Result<Self> {
         Ok(ParseHelpers::sequence(input, errors))

diff --git a/crates/codegen/language/definition/src/internals/write_output_tokens/external_types.rs b/crates/codegen/language/definition/src/internals/write_output_tokens/external_types.rs
@@ -119,6 +119,16 @@ impl WriteOutputTokens for usize {
     }
 }
 
+impl WriteOutputTokens for u8 {
+    fn write_output_tokens(&self) -> TokenStream {
+        let value = Literal::u8_suffixed(*self);
+
+        quote! {
+            #value.into()
+        }
+    }
+}
+
 impl<T: WriteOutputTokens> WriteOutputTokens for Vec<T> {
     fn write_output_tokens(&self) -> TokenStream {
         let items = self.iter().map(T::write_output_tokens);

diff --git a/crates/codegen/language/definition/src/model/non_terminals/field.rs b/crates/codegen/language/definition/src/model/non_terminals/field.rs
@@ -15,6 +15,13 @@ pub struct FieldsErrorRecovery {
 pub struct FieldDelimiters {
     pub open: Identifier,
     pub close: Identifier,
+    /// How many tokens have to be matched to trigger the error recovery.
+    /// For ambiguous syntaxes this needs to be set to at least N, where N
+    /// is the token lookahead required to disambiguate the syntax.
+    ///
+    /// By default, we assume no lookahead (0) is required to recover from
+    /// unrecognized body between delimiters, so it's always triggered.
+    pub tokens_matched_acceptance_threshold: Option<u8>,
 }
 
 #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]

diff --git a/crates/codegen/language/internal_macros/src/derive/spanned.rs b/crates/codegen/language/internal_macros/src/derive/spanned.rs
@@ -159,7 +159,7 @@ fn get_spanned_type(input: Type) -> Type {
         }
 
         // External types should also be wrapped in 'Spanned<T>':
-        "bool" | "char" | "PathBuf" | "String" | "Version" => {
+        "bool" | "char" | "PathBuf" | "String" | "u8" | "Version" => {
             parse_quote! {
                 crate::internals::Spanned<#input>
             }

diff --git a/crates/codegen/parser/generator/src/parser_definition.rs b/crates/codegen/parser/generator/src/parser_definition.rs
@@ -191,7 +191,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
                 quote! { self.#function_name(input) }
             }
 
-            Self::DelimitedBy(open, body, close) => {
+            Self::DelimitedBy(open, body, close, threshold) => {
                 let open_label = format_ident!("{}", open.label.to_pascal_case());
                 let close_label = format_ident!("{}", close.label.to_pascal_case());
                 let [open_delim, close_delim] = match (open.as_ref(), close.as_ref()) {
@@ -201,6 +201,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
                     ) => [open, close].map(|scanner| format_ident!("{}", scanner.name())),
                     _ => unreachable!("Only tokens are permitted as delimiters"),
                 };
+                let threshold = threshold.0;
 
                 let parser = body.to_parser_code(context_name, is_trivia);
                 let body_parser = body.applicable_version_quality_ranges().wrap_code(
@@ -209,7 +210,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
                             .recover_until_with_nested_delims::<_, #lex_ctx>(input,
                                 self,
                                 TokenKind::#close_delim,
-                                RecoverFromNoMatch::Yes,
+                                TokenAcceptanceThreshold(#threshold),
                             )
                         )?;
                     },
@@ -275,7 +276,8 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
                             .recover_until_with_nested_delims::<_, #lex_ctx>(input,
                                 self,
                                 TokenKind::#terminator,
-                                RecoverFromNoMatch::No,
+                                 // Requires at least a partial match not to risk misparsing
+                                TokenAcceptanceThreshold(1u8),
                             )
                         )?;
                     },

diff --git a/crates/codegen/parser/generator/src/rust_generator.rs b/crates/codegen/parser/generator/src/rust_generator.rs
@@ -421,7 +421,7 @@ impl GrammarVisitor for RustGenerator {
             }
 
             // Collect delimiters for each context
-            ParserDefinitionNode::DelimitedBy(open, _, close) => {
+            ParserDefinitionNode::DelimitedBy(open, _, close, ..) => {
                 self.labels.insert(open.label.clone());
                 self.labels.insert(close.label.clone());
 

diff --git a/crates/codegen/parser/runtime/src/parser_support/mod.rs b/crates/codegen/parser/runtime/src/parser_support/mod.rs
@@ -26,7 +26,7 @@ pub(crate) use parser_result::ParserResult;
 #[allow(unused_imports)]
 pub(crate) use precedence_helper::PrecedenceHelper;
 #[allow(unused_imports)]
-pub(crate) use recovery::RecoverFromNoMatch;
+pub(crate) use recovery::TokenAcceptanceThreshold;
 #[allow(unused_imports)]
 pub(crate) use repetition_helper::{OneOrMoreHelper, ZeroOrMoreHelper};
 #[allow(unused_imports)]

diff --git a/crates/codegen/parser/runtime/src/parser_support/parser_result.rs b/crates/codegen/parser/runtime/src/parser_support/parser_result.rs
@@ -1,6 +1,6 @@
 use std::ops::ControlFlow;
 
-use crate::cst::{self, LabeledNode};
+use crate::cst::{self, LabeledNode, Node};
 use crate::kinds::{NodeLabel, RuleKind, TokenKind};
 use crate::text_index::TextIndex;
 
@@ -201,6 +201,33 @@ impl IncompleteMatch {
             expected_tokens,
         }
     }
+
+    /// Whether this prefix-matched at least `n` (non-skipped) significant tokens.
+    pub fn matches_at_least_n_tokens(&self, n: u8) -> bool {
+        let result = self
+            .nodes
+            .iter()
+            .flat_map(|node| node.cursor_with_offset(TextIndex::ZERO))
+            .try_fold(0u8, |mut acc, node| {
+                match node {
+                    Node::Token(tok) if tok.kind != TokenKind::SKIPPED && !tok.kind.is_trivia() => {
+                        acc += 1;
+                    }
+                    _ => {}
+                }
+
+                // Short-circuit not to walk the whole tree if we've already matched enough
+                if acc >= n {
+                    ControlFlow::Break(acc)
+                } else {
+                    ControlFlow::Continue(acc)
+                }
+            });
+
+        match result {
+            ControlFlow::Continue(value) | ControlFlow::Break(value) => value >= n,
+        }
+    }
 }
 
 #[derive(PartialEq, Eq, Clone, Debug)]

diff --git a/crates/codegen/parser/runtime/src/parser_support/recovery.rs b/crates/codegen/parser/runtime/src/parser_support/recovery.rs
@@ -7,18 +7,11 @@ use crate::parser_support::parser_result::SkippedUntil;
 use crate::parser_support::ParserResult;
 use crate::text_index::{TextRange, TextRangeExtensions as _};
 
-/// An explicit parameter for the [`ParserResult::recover_until_with_nested_delims`] method.
+/// How many tokens have to be matched to trigger the error recovery.
+/// For ambiguous syntaxes this needs to be set to at least N, where N
+/// is the token lookahead required to disambiguate the syntax.
 #[derive(Clone, Copy)]
-pub(crate) enum RecoverFromNoMatch {
-    Yes,
-    No,
-}
-
-impl RecoverFromNoMatch {
-    pub fn as_bool(self) -> bool {
-        matches!(self, RecoverFromNoMatch::Yes)
-    }
-}
+pub(crate) struct TokenAcceptanceThreshold(pub(crate) u8);
 
 fn opt_parse(
     input: &mut ParserContext<'_>,
@@ -46,7 +39,7 @@ impl ParserResult {
         input: &mut ParserContext<'_>,
         lexer: &L,
         expected: TokenKind,
-        recover_from_no_match: RecoverFromNoMatch,
+        acceptance_threshold: TokenAcceptanceThreshold,
     ) -> ParserResult {
         enum ParseResultKind {
             Match,
@@ -57,11 +50,15 @@ impl ParserResult {
         let before_recovery = input.position();
 
         let (mut nodes, mut expected_tokens, result_kind) = match self {
-            ParserResult::IncompleteMatch(result) => (
-                result.nodes,
-                result.expected_tokens,
-                ParseResultKind::Incomplete,
-            ),
+            ParserResult::IncompleteMatch(result)
+                if result.matches_at_least_n_tokens(acceptance_threshold.0) =>
+            {
+                (
+                    result.nodes,
+                    result.expected_tokens,
+                    ParseResultKind::Incomplete,
+                )
+            }
             ParserResult::Match(result)
                 if lexer
                     .peek_token_with_trivia::<LexCtx>(input)
@@ -70,7 +67,7 @@ impl ParserResult {
             {
                 (result.nodes, result.expected_tokens, ParseResultKind::Match)
             }
-            ParserResult::NoMatch(result) if recover_from_no_match.as_bool() => {
+            ParserResult::NoMatch(result) if acceptance_threshold.0 == 0 => {
                 (vec![], result.expected_tokens, ParseResultKind::NoMatch)
             }
             // No need to recover, so just return as-is.

diff --git a/crates/codegen/parser/runtime/src/templates/language.rs.jinja2 b/crates/codegen/parser/runtime/src/templates/language.rs.jinja2
@@ -22,7 +22,7 @@ use crate::napi_interface::parse_output::ParseOutput as NAPIParseOutput;
 use crate::parse_output::ParseOutput;
 use crate::parser_support::{
     ChoiceHelper, OneOrMoreHelper, OptionalHelper, ParserContext, ParserFunction, ParserResult,
-    PrecedenceHelper, RecoverFromNoMatch, SeparatedHelper, SequenceHelper, ZeroOrMoreHelper,
+    PrecedenceHelper, SeparatedHelper, SequenceHelper, TokenAcceptanceThreshold, ZeroOrMoreHelper,
 };
 
 #[derive(Debug)]

diff --git a/crates/solidity/inputs/language/src/definition.rs b/crates/solidity/inputs/language/src/definition.rs
@@ -3378,12 +3378,29 @@ codegen_language_macros::compile!(Language(
                                     name = FunctionCallExpression,
                                     operators = [PrecedenceOperator(
                                         model = Postfix,
+                                        fields = (arguments = Required(ArgumentsDeclaration))
+                                    )]
+                                ),
+                                PrecedenceExpression(
+                                    name = CallOptionsExpression,
+                                    operators = [PrecedenceOperator(
+                                        model = Postfix,
+                                        enabled = From("0.6.2"),
+                                        error_recovery = FieldsErrorRecovery(
+                                            delimiters = FieldDelimiters(
+                                                open = open_brace,
+                                                close = close_brace,
+                                                // NOTE: Despite `CallOptions` requiring at least one element,
+                                                // we can only recover if we found at least two tokens (`ident:`)
+                                                // in the body, as this may be otherwise ambiguous with
+                                                // `try <EXPR> { func() } catch {}`.
+                                                tokens_matched_acceptance_threshold = 2
+                                            )
+                                        ),
                                         fields = (
-                                            options = Optional(
-                                                reference = FunctionCallOptions,
-                                                enabled = From("0.6.2")
-                                            ),
-                                            arguments = Required(ArgumentsDeclaration)
+                                            open_brace = Required(OpenBrace),
+                                            options = Required(CallOptions),
+                                            close_brace = Required(CloseBrace)
                                         )
                                     )]
                                 ),
@@ -3456,20 +3473,6 @@ codegen_language_macros::compile!(Language(
                 Topic(
                     title = "Function Calls",
                     items = [
-                        Enum(
-                            name = FunctionCallOptions,
-                            enabled = From("0.6.2"),
-                            variants = [
-                                EnumVariant(
-                                    reference = NamedArgumentGroups,
-                                    enabled = Range(from = "0.6.2", till = "0.8.0")
-                                ),
-                                EnumVariant(
-                                    reference = NamedArgumentGroup,
-                                    enabled = From("0.8.0")
-                                )
-                            ]
-                        ),
                         Enum(
                             name = ArgumentsDeclaration,
                             variants = [
@@ -3507,11 +3510,6 @@ codegen_language_macros::compile!(Language(
                                 close_paren = Required(CloseParen)
                             )
                         ),
-                        Repeated(
-                            name = NamedArgumentGroups,
-                            reference = NamedArgumentGroup,
-                            enabled = Range(from = "0.6.2", till = "0.8.0")
-                        ),
                         Struct(
                             name = NamedArgumentGroup,
                             error_recovery = FieldsErrorRecovery(
@@ -3530,6 +3528,14 @@ codegen_language_macros::compile!(Language(
                             separator = Comma,
                             allow_empty = true
                         ),
+                        Separated(
+                            name = CallOptions,
+                            reference = NamedArgument,
+                            separator = Comma,
+                            enabled = From("0.6.2"),
+                            // These cannot be empty as they're ambiguous with `try <EXPR> {} catch {}`
+                            allow_empty = false
+                        ),
                         Struct(
                             name = NamedArgument,
                             fields = (