rubberduck-vba
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎Rubberduck.Parsing/Grammar/VBABaseLexer.cs‎
Lines changed: 34 additions & 0 deletions b/‎Rubberduck.Parsing/Grammar/VBABaseLexer.cs‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎Rubberduck.Parsing/Grammar/VBABaseParser.cs‎
Lines changed: 79 additions & 0 deletions b/‎Rubberduck.Parsing/Grammar/VBABaseParser.cs‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎Rubberduck.Parsing/Grammar/VBABaseParserRuleContext.cs‎
Lines changed: 14 additions & 0 deletions b/‎Rubberduck.Parsing/Grammar/VBABaseParserRuleContext.cs‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎Rubberduck.Parsing/Grammar/VBALexer.g4‎
Lines changed: 5 additions & 5 deletions b/‎Rubberduck.Parsing/Grammar/VBALexer.g4‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎Rubberduck.Parsing/Grammar/VBAParser.g4‎
Lines changed: 11 additions & 9 deletions b/‎Rubberduck.Parsing/Grammar/VBAParser.g4‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎appveyor.yml‎
Lines changed: 3 additions & 1 deletion b/‎appveyor.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎development/java/Rubberduck.Parsing/Grammar/.gitignore‎
Lines changed: 39 additions & 0 deletions b/‎development/java/Rubberduck.Parsing/Grammar/.gitignore‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎development/java/Rubberduck.Parsing/Grammar/build.gradle‎
Lines changed: 56 additions & 0 deletions b/‎development/java/Rubberduck.Parsing/Grammar/build.gradle‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎development/java/Rubberduck.Parsing/Grammar/gradle/wrapper/gradle-wrapper.jar‎
52.1 KB b/‎development/java/Rubberduck.Parsing/Grammar/gradle/wrapper/gradle-wrapper.jar‎
52.1 KB
@@ -183,3 +183,6 @@ CodeGraphData/
 /Rubberduck.Deployment/Properties/launchSettings.json
 /Rubberduck.Deployment/Rubberduck.API.idl
 /Rubberduck.Deployment/Rubberduck.idl
+
+#Gradle 
+/.gradle/
@@ -0,0 +1,34 @@
+using Antlr4.Runtime;
+
+namespace Rubberduck.Parsing.Grammar
+{
+    public abstract class VBABaseLexer : Lexer
+    {
+        public VBABaseLexer(ICharStream input) : base(input) { }
+
+        #region Semantic predicate helper methods
+        protected int CharAtRelativePosition(int i)
+        {
+            return _input.La(i);
+        }
+
+        protected bool IsChar(int actual, char expected)
+        {
+            return (char)actual == expected;
+        }
+
+        protected bool IsChar(int actual, params char[] expectedOptions)
+        {
+            char actualAsChar = (char)actual;
+            foreach (char expected in expectedOptions)
+            {
+                if (actualAsChar == expected)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+        #endregion
+    }
+}
@@ -0,0 +1,79 @@
+using Antlr4.Runtime;
+using System;
+using System.Text.RegularExpressions;
+
+namespace Rubberduck.Parsing.Grammar
+{
+    public abstract class VBABaseParser : Parser
+    {
+        public VBABaseParser(ITokenStream input) : base(input) { }
+
+        #region Semantic predicate helper methods
+        protected int TokenTypeAtRelativePosition(int i)
+        {
+            return _input.La(i);
+        }
+
+        protected IToken TokenAtRelativePosition(int i)
+        {
+            return _input.Lt(i);
+        }
+
+        protected string TextOf(IToken token)
+        {
+            return token.Text;
+        }
+
+        protected bool MatchesRegex(string text, string pattern)
+        {
+            return Regex.Match(text,pattern).Success;
+        }
+
+        protected bool EqualsStringIgnoringCase(string actual, string expected)
+        {
+            return actual.Equals(expected,StringComparison.OrdinalIgnoreCase);
+        }
+
+        protected bool EqualsStringIgnoringCase(string actual, params string[] expectedOptions)
+        {
+            foreach (string expected in expectedOptions)
+            {
+                if (actual.Equals(expected,StringComparison.OrdinalIgnoreCase))
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        protected bool EqualsString(string actual, string expected)
+        {
+            return actual.Equals(expected,StringComparison.Ordinal);
+        }
+
+        protected bool EqualsString(string actual, params string[] expectedOptions)
+        {
+            foreach (string expected in expectedOptions)
+            {
+                if (actual.Equals(expected,StringComparison.Ordinal))
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        protected bool IsTokenType(int actual, params int[] expectedOptions)
+        {
+            foreach (int expected in expectedOptions)
+            {
+                if (actual == expected)
+                {
+                    return true;
+                }
+            }
+            return false;
+        }
+        #endregion
+    }
+}
@@ -0,0 +1,14 @@
+using Antlr4.Runtime;
+using System;
+using System.Text.RegularExpressions;
+
+namespace Rubberduck.Parsing.Grammar
+{
+    // Currently this class does nothing, except allow other languages/implementations to define a custom contextSuperclass without having to change the grammar.
+    public abstract class VBABaseParserRuleContext : ParserRuleContext
+    {
+        public VBABaseParserRuleContext() : base() { }
+        
+        public VBABaseParserRuleContext(Antlr4.Runtime.ParserRuleContext parent, int invokingStateNumber) : base(parent, invokingStateNumber) { }
+    }
+}
@@ -17,6 +17,10 @@
 
 lexer grammar VBALexer;
 
+options {
+    superClass = VBABaseLexer;
+    contextSuperClass = VBABaseParser;
+}
 
 ABS : A B S;
 ANY : A N Y;
@@ -305,11 +309,7 @@ IDENTIFIER :  ~[[\](){}\r\n\t.,'"|!@#$%^&*\-+:=; 0-9-/\\-] ~[[\](){}\r\n\t.,'"|!
 LINE_CONTINUATION : [ \t]+ UNDERSCORE [ \t]* '\r'? '\n' WS_NOT_FOLLOWED_BY_LINE_CONTINUATION*;
 // The following rule is needed in order to capture hex literals without format prefixes which start with a digit. Needed for VBForm resources.
 BARE_HEX_LITERAL : [0-9] [0-9a-fA-F]*; 
-fragment WS_NOT_FOLLOWED_BY_LINE_CONTINUATION : [ \t] {(char)_input.La(1) != '_' 
-                                                          || ((char)_input.La(2) != '\r' 
-                                                              && (char)_input.La(2) != '\n' 
-                                                              && (char)_input.La(2) != '\t' 
-                                                              && (char)_input.La(2) != ' ')}?;
+fragment WS_NOT_FOLLOWED_BY_LINE_CONTINUATION : [ \t] {!IsChar(CharAtRelativePosition(1),'_') || !IsChar(CharAtRelativePosition(2),'\r','\n','\t',' ')}?;
 fragment LETTER : [a-zA-Z_äöüÄÖÜ];
 fragment DIGIT : [0-9];
 fragment LETTERORDIGIT : [a-zA-Z0-9_äöüÄÖÜ];
 
@@ -19,9 +19,11 @@
 
 parser grammar VBAParser;
 
-options { tokenVocab = VBALexer; }
-
-@header { using System.Text.RegularExpressions; }
+options {
+    tokenVocab = VBALexer;
+    superClass = VBABaseParser;
+    contextSuperClass = VBABaseParserRuleContext;
+ }
 
 startRule : module EOF;
 
@@ -321,14 +323,14 @@ defType :
 // singleLetter must appear at the end to prevent premature bailout
 letterSpec : universalLetterRange | letterRange | singleLetter;
 
-singleLetter : {_input.Lt(1).Text.Length == 1 && Regex.Match(_input.Lt(1).Text, @"[a-zA-Z]").Success}? IDENTIFIER;
+singleLetter : {MatchesRegex(TextOf(TokenAtRelativePosition(1)),"^[a-zA-Z]$")}? IDENTIFIER;
 
 // We make a separate universalLetterRange rule because it is treated specially in VBA. This makes it easy for users of the parser
 // to identify this case. Quoting MS VBAL:
 // "A <universal-letter-range> defines a single implicit declared type for every <IDENTIFIER> within 
 // a module, even those with a first character that would otherwise fall outside this range if it was 
 // interpreted as a <letter-range> from A-Z.""
-universalLetterRange : {_input.Lt(1).Text.Equals("A") && _input.Lt(3).Text.Equals("Z")}? IDENTIFIER MINUS IDENTIFIER;
+universalLetterRange : {EqualsString(TextOf(TokenAtRelativePosition(1)),"A") && EqualsString(TextOf(TokenAtRelativePosition(3)),"Z")}? IDENTIFIER MINUS IDENTIFIER;
 
 letterRange : singleLetter MINUS singleLetter;
 
@@ -571,22 +573,22 @@ circleSpecialForm : (expression whiteSpace? DOT whiteSpace?)? CIRCLE whiteSpace
 scaleSpecialForm : (expression whiteSpace? DOT whiteSpace?)? SCALE whiteSpace tuple whiteSpace? MINUS whiteSpace? tuple;
 pSetSpecialForm : (expression whiteSpace? DOT whiteSpace?)? PSET (whiteSpace STEP)? whiteSpace? tuple whiteSpace? (COMMA whiteSpace? expression)?;
 tuple : LPAREN whiteSpace? expression whiteSpace? COMMA whiteSpace? expression whiteSpace? RPAREN;
-lineSpecialFormOption : {_input.Lt(1).Text.ToLower().Equals("b") || _input.Lt(1).Text.ToLower().Equals("bf")}? unrestrictedIdentifier;
+lineSpecialFormOption : {EqualsStringIgnoringCase(TextOf(TokenAtRelativePosition(1)),"b","bf")}? unrestrictedIdentifier;
 
 subscripts : subscript (whiteSpace? COMMA whiteSpace? subscript)*;
 
 subscript : (expression whiteSpace TO whiteSpace)? expression;
 
 unrestrictedIdentifier : identifier | statementKeyword | markerKeyword;
-legalLabelIdentifier : { !(new[]{DOEVENTS,END,CLOSE,ELSE,LOOP,NEXT,RANDOMIZE,REM,RESUME,RETURN,STOP,WEND}).Contains(_input.La(1))}? identifier | markerKeyword;
+legalLabelIdentifier : { !IsTokenType(TokenTypeAtRelativePosition(1),DOEVENTS,END,CLOSE,ELSE,LOOP,NEXT,RANDOMIZE,REM,RESUME,RETURN,STOP,WEND)}? identifier | markerKeyword;
 //The predicate in the following rule has been introduced to lessen the problem that VBA uses the same characters used as type hints in other syntactical constructs, 
 //e.g. in the bang notation (see withDictionaryAccessExpr). Generally, it is not legal to have an identifier or opening bracket follow immediately after a type hint.
 //The first part of the predicate tries to exclude these two situations. Unfortunately, predicates have to be at the start of a rule. So, an assumption about the number 
 //of tokens in the identifier is made. All untypedIdentifers not a foreignNames consist of exactly one token and a typedIdentifier is an untyped one followed by a typeHint,
 //again a single token. So, in the majority of situations, the third token is the token following the potential type hint. 
 //For foreignNames, no assumption can be made because they consist of a pair of brackets containing arbitrarily many tokens. 
 //That is why the second part of the predicate looks at the first character in order to determine whether the identifier is a foreignName. 
-identifier : {_input.La(3) != IDENTIFIER && _input.La(3) != L_SQUARE_BRACKET || _input.La(1) == L_SQUARE_BRACKET}? typedIdentifier 
+identifier : {!IsTokenType(TokenTypeAtRelativePosition(3),IDENTIFIER,L_SQUARE_BRACKET) || IsTokenType(TokenTypeAtRelativePosition(1),L_SQUARE_BRACKET)}? typedIdentifier
              | untypedIdentifier;
 untypedIdentifier : identifierValue;
 typedIdentifier : untypedIdentifier typeHint;
@@ -614,7 +616,7 @@ complexType :
 fieldLength : MULT whiteSpace? (numberLiteral | identifierValue);
 
 //Statement labels can only appear at the start of a line.
-statementLabelDefinition : {_input.La(-1) == NEWLINE || _input.La(-1) == LINE_CONTINUATION}? (combinedLabels | identifierStatementLabel | standaloneLineNumberLabel);
+statementLabelDefinition : {IsTokenType(TokenTypeAtRelativePosition(-1),NEWLINE,LINE_CONTINUATION)}? (combinedLabels | identifierStatementLabel | standaloneLineNumberLabel);
 identifierStatementLabel : legalLabelIdentifier whiteSpace? COLON;
 standaloneLineNumberLabel : 
     lineNumberLabel whiteSpace? COLON
 
@@ -37,9 +37,10 @@ platform: Any CPU
 # cache the nuget packages unless something changed there
 cache:
   - packages/ -> **/packages.config
+  - '%USERPROFILE%/.gradle/wrapper/dists'
 
 install:
-  set PATH=C:\Program Files (x86)\MSBuild\15.0\Bin;%PATH%
+  set PATH=C:\Program Files (x86)\MSBuild\15.0\Bin;C:\Program Files (x86)\Java\jdk1.8.0;%PATH%
 
 # patch version specifiers in the base project
 dotnet_csproj:
@@ -51,6 +52,7 @@ dotnet_csproj:
 
 
 before_build:
+  - development/java/Rubberduck.Parsing/Grammar/gradlew.bat -p development/java/Rubberduck.Parsing/Grammar clean build
   - cinst innosetup
   - cinst codecov
   - cinst opencover.portable
 
@@ -0,0 +1,39 @@
+
+# Created by https://www.gitignore.io/api/git,gradle
+# Edit at https://www.gitignore.io/?templates=git,gradle
+
+### Git ###
+# Created by git for backups. To disable backups in Git:
+# $ git config --global mergetool.keepBackup false
+*.orig
+
+# Created by git when using merge tools for conflicts
+*.BACKUP.*
+*.BASE.*
+*.LOCAL.*
+*.REMOTE.*
+*_BACKUP_*.txt
+*_BASE_*.txt
+*_LOCAL_*.txt
+*_REMOTE_*.txt
+
+### Gradle ###
+.gradle
+build/
+
+# Ignore Gradle GUI config
+gradle-app.setting
+
+# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
+!gradle-wrapper.jar
+
+# Cache of project
+.gradletasknamecache
+
+# # Work around https://youtrack.jetbrains.com/issue/IDEA-116898
+# gradle/wrapper/gradle-wrapper.properties
+
+### Gradle Patch ###
+**/build/
+
+# End of https://www.gitignore.io/api/git,gradle
@@ -0,0 +1,56 @@
+buildscript {
+	project.ext.antlrVersion = '4.7.2'
+
+    repositories {
+        mavenCentral()
+    }
+
+    dependencies {
+        classpath group: 'org.antlr', name: 'antlr4', version: "${project.ext.antlrVersion}"
+    }
+}
+
+plugins {
+    id 'java'
+}
+
+def envVersion = System.getenv("APPVEYOR_VERSION")
+
+group 'com.rubberduckvba.rubberduck.parsing'
+version envVersion==null?'snapshot':envVersion
+
+repositories {
+    mavenCentral()
+}
+
+dependencies {
+    compile group: 'org.antlr', name: 'antlr4-runtime', version: "${project.ext.antlrVersion}"
+}
+
+def grammarCodeGenDest = "${projectDir}/src/main/gen"
+def grammarSource = "${projectDir}/../../../../Rubberduck.Parsing/Grammar/"
+def grammarDest = "${projectDir}/src/main/antlr/com/rubberduckvba/rubberduck/parsing/grammar"
+
+sourceSets {
+    main {
+        java {
+            srcDir "${grammarCodeGenDest}"
+        }
+    }
+}
+
+task copyGrammarFiles(type: Copy) {
+    from grammarSource
+    into grammarDest
+    include "VBALexer.g4"
+    include "VBAParser.g4"
+}
+
+task generateGrammarSources(type: JavaExec) {
+    main 'org.antlr.v4.Tool'
+    classpath = buildscript.configurations.classpath
+    args "-o", "${grammarCodeGenDest}", "-visitor", "-package", "com.rubberduckvba.rubberduck.parsing.grammar", "${grammarDest}/VBALexer.g4", "${grammarDest}/VBAParser.g4"
+}
+
+generateGrammarSources.dependsOn copyGrammarFiles
+build.dependsOn generateGrammarSources