|
19 | 19 |
|
20 | 20 | parser grammar VBAParser; |
21 | 21 |
|
22 | | -options { tokenVocab = VBALexer; } |
23 | | - |
24 | | -@header { using System.Text.RegularExpressions; } |
| 22 | +options { |
| 23 | + tokenVocab = VBALexer; |
| 24 | + superClass = VBABaseParser; |
| 25 | + contextSuperClass = VBABaseParserRuleContext; |
| 26 | + } |
25 | 27 |
|
26 | 28 | startRule : module EOF; |
27 | 29 |
|
@@ -321,14 +323,14 @@ defType : |
321 | 323 | // singleLetter must appear at the end to prevent premature bailout |
322 | 324 | letterSpec : universalLetterRange | letterRange | singleLetter; |
323 | 325 |
|
324 | | -singleLetter : {_input.Lt(1).Text.Length == 1 && Regex.Match(_input.Lt(1).Text, @"[a-zA-Z]").Success}? IDENTIFIER; |
| 326 | +singleLetter : {MatchesRegex(TextOf(TokenAtRelativePosition(1)),"^[a-zA-Z]$")}? IDENTIFIER; |
325 | 327 |
|
326 | 328 | // We make a separate universalLetterRange rule because it is treated specially in VBA. This makes it easy for users of the parser |
327 | 329 | // to identify this case. Quoting MS VBAL: |
328 | 330 | // "A <universal-letter-range> defines a single implicit declared type for every <IDENTIFIER> within |
329 | 331 | // a module, even those with a first character that would otherwise fall outside this range if it was |
330 | 332 | // interpreted as a <letter-range> from A-Z."" |
331 | | -universalLetterRange : {_input.Lt(1).Text.Equals("A") && _input.Lt(3).Text.Equals("Z")}? IDENTIFIER MINUS IDENTIFIER; |
| 333 | +universalLetterRange : {EqualsString(TextOf(TokenAtRelativePosition(1)),"A") && EqualsString(TextOf(TokenAtRelativePosition(3)),"Z")}? IDENTIFIER MINUS IDENTIFIER; |
332 | 334 |
|
333 | 335 | letterRange : singleLetter MINUS singleLetter; |
334 | 336 |
|
@@ -571,22 +573,22 @@ circleSpecialForm : (expression whiteSpace? DOT whiteSpace?)? CIRCLE whiteSpace |
571 | 573 | scaleSpecialForm : (expression whiteSpace? DOT whiteSpace?)? SCALE whiteSpace tuple whiteSpace? MINUS whiteSpace? tuple; |
572 | 574 | pSetSpecialForm : (expression whiteSpace? DOT whiteSpace?)? PSET (whiteSpace STEP)? whiteSpace? tuple whiteSpace? (COMMA whiteSpace? expression)?; |
573 | 575 | tuple : LPAREN whiteSpace? expression whiteSpace? COMMA whiteSpace? expression whiteSpace? RPAREN; |
574 | | -lineSpecialFormOption : {_input.Lt(1).Text.ToLower().Equals("b") || _input.Lt(1).Text.ToLower().Equals("bf")}? unrestrictedIdentifier; |
| 576 | +lineSpecialFormOption : {EqualsStringIgnoringCase(TextOf(TokenAtRelativePosition(1)),"b","bf")}? unrestrictedIdentifier; |
575 | 577 |
|
576 | 578 | subscripts : subscript (whiteSpace? COMMA whiteSpace? subscript)*; |
577 | 579 |
|
578 | 580 | subscript : (expression whiteSpace TO whiteSpace)? expression; |
579 | 581 |
|
580 | 582 | unrestrictedIdentifier : identifier | statementKeyword | markerKeyword; |
581 | | -legalLabelIdentifier : { !(new[]{DOEVENTS,END,CLOSE,ELSE,LOOP,NEXT,RANDOMIZE,REM,RESUME,RETURN,STOP,WEND}).Contains(_input.La(1))}? identifier | markerKeyword; |
| 583 | +legalLabelIdentifier : { !IsTokenType(TokenTypeAtRelativePosition(1),DOEVENTS,END,CLOSE,ELSE,LOOP,NEXT,RANDOMIZE,REM,RESUME,RETURN,STOP,WEND)}? identifier | markerKeyword; |
582 | 584 | //The predicate in the following rule has been introduced to lessen the problem that VBA uses the same characters used as type hints in other syntactical constructs, |
583 | 585 | //e.g. in the bang notation (see withDictionaryAccessExpr). Generally, it is not legal to have an identifier or opening bracket follow immediately after a type hint. |
584 | 586 | //The first part of the predicate tries to exclude these two situations. Unfortunately, predicates have to be at the start of a rule. So, an assumption about the number |
585 | 587 | //of tokens in the identifier is made. All untypedIdentifers not a foreignNames consist of exactly one token and a typedIdentifier is an untyped one followed by a typeHint, |
586 | 588 | //again a single token. So, in the majority of situations, the third token is the token following the potential type hint. |
587 | 589 | //For foreignNames, no assumption can be made because they consist of a pair of brackets containing arbitrarily many tokens. |
588 | 590 | //That is why the second part of the predicate looks at the first character in order to determine whether the identifier is a foreignName. |
589 | | -identifier : {_input.La(3) != IDENTIFIER && _input.La(3) != L_SQUARE_BRACKET || _input.La(1) == L_SQUARE_BRACKET}? typedIdentifier |
| 591 | +identifier : {!IsTokenType(TokenTypeAtRelativePosition(3),IDENTIFIER,L_SQUARE_BRACKET) || IsTokenType(TokenTypeAtRelativePosition(1),L_SQUARE_BRACKET)}? typedIdentifier |
590 | 592 | | untypedIdentifier; |
591 | 593 | untypedIdentifier : identifierValue; |
592 | 594 | typedIdentifier : untypedIdentifier typeHint; |
@@ -614,7 +616,7 @@ complexType : |
614 | 616 | fieldLength : MULT whiteSpace? (numberLiteral | identifierValue); |
615 | 617 |
|
616 | 618 | //Statement labels can only appear at the start of a line. |
617 | | -statementLabelDefinition : {_input.La(-1) == NEWLINE || _input.La(-1) == LINE_CONTINUATION}? (combinedLabels | identifierStatementLabel | standaloneLineNumberLabel); |
| 619 | +statementLabelDefinition : {IsTokenType(TokenTypeAtRelativePosition(-1),NEWLINE,LINE_CONTINUATION)}? (combinedLabels | identifierStatementLabel | standaloneLineNumberLabel); |
618 | 620 | identifierStatementLabel : legalLabelIdentifier whiteSpace? COLON; |
619 | 621 | standaloneLineNumberLabel : |
620 | 622 | lineNumberLabel whiteSpace? COLON |
|
0 commit comments