From d2bf2944ac51a875168ae15e8c9f4bb659b99f79 Mon Sep 17 00:00:00 2001 From: grammarware Date: Tue, 8 Mar 2011 14:38:26 +0000 Subject: [PATCH] moar Java/ANTLR grammars: 1.3 parr; 1.4 parr/cortex; 6 jiang. git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@969 ab42f6e0-554d-0410-b580-99e487e6eeb2 --- topics/grammars/java/antlr-java-1.3/Main.java | 114 + .../grammars/java/antlr-java-1.3/README.txt | 6 + topics/grammars/java/antlr-java-1.3/java.g | 1277 +++++++++ .../grammars/java/antlr-java-1.3/java.tree.g | 324 +++ topics/grammars/java/antlr-java-1.3/shiplist | 5 + .../grammars/java/antlr-java-1.3/tests/A.java | 12 + .../grammars/java/antlr-java-1.3/tests/B.java | 5 + .../grammars/java/antlr-java-1.3/tests/C.java | 25 + .../grammars/java/antlr-java-1.3/tests/D.java | 32 + .../grammars/java/antlr-java-1.3/tests/E.java | 24 + .../grammars/java/antlr-java-1.3/tests/T.java | 22 + .../grammars/java/antlr-java-1.4/README.txt | 7 + topics/grammars/java/antlr-java-1.4/javaG.g | 1364 +++++++++ topics/grammars/java/antlr-java-6/Java.g | 2507 +++++++++++++++++ topics/grammars/java/antlr-java-6/README.txt | 8 + 15 files changed, 5732 insertions(+) create mode 100644 topics/grammars/java/antlr-java-1.3/Main.java create mode 100644 topics/grammars/java/antlr-java-1.3/README.txt create mode 100644 topics/grammars/java/antlr-java-1.3/java.g create mode 100644 topics/grammars/java/antlr-java-1.3/java.tree.g create mode 100644 topics/grammars/java/antlr-java-1.3/shiplist create mode 100644 topics/grammars/java/antlr-java-1.3/tests/A.java create mode 100644 topics/grammars/java/antlr-java-1.3/tests/B.java create mode 100644 topics/grammars/java/antlr-java-1.3/tests/C.java create mode 100644 topics/grammars/java/antlr-java-1.3/tests/D.java create mode 100644 topics/grammars/java/antlr-java-1.3/tests/E.java create mode 100644 topics/grammars/java/antlr-java-1.3/tests/T.java create mode 100644 topics/grammars/java/antlr-java-1.4/README.txt create mode 100644 topics/grammars/java/antlr-java-1.4/javaG.g create mode 100644 topics/grammars/java/antlr-java-6/Java.g create mode 100644 topics/grammars/java/antlr-java-6/README.txt diff --git a/topics/grammars/java/antlr-java-1.3/Main.java b/topics/grammars/java/antlr-java-1.3/Main.java new file mode 100644 index 00000000..0118ec6b --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/Main.java @@ -0,0 +1,114 @@ +import java.io.*; +import antlr.collections.AST; +import antlr.collections.impl.*; +import antlr.debug.misc.*; +import antlr.*; +import java.awt.event.*; + +class Main { + + static boolean showTree = false; + public static void main(String[] args) { + // Use a try/catch block for parser exceptions + try { + // if we have at least one command-line argument + if (args.length > 0 ) { + System.err.println("Parsing..."); + + // for each directory/file specified on the command line + for(int i=0; i< args.length;i++) { + if ( args[i].equals("-showtree") ) { + showTree = true; + } + else { + doFile(new File(args[i])); // parse it + } + } } + else + System.err.println("Usage: java Main [-showtree] "+ + ""); + } + catch(Exception e) { + System.err.println("exception: "+e); + e.printStackTrace(System.err); // so we can get stack trace + } + } + + + // This method decides what action to take based on the type of + // file we are looking at + public static void doFile(File f) + throws Exception { + // If this is a directory, walk each file/dir in that directory + if (f.isDirectory()) { + String files[] = f.list(); + for(int i=0; i < files.length; i++) + doFile(new File(f, files[i])); + } + + // otherwise, if this is a java file, parse it! + else if ((f.getName().length()>5) && + f.getName().substring(f.getName().length()-5).equals(".java")) { + System.err.println(" "+f.getAbsolutePath()); + // parseFile(f.getName(), new FileInputStream(f)); + parseFile(f.getName(), new BufferedReader(new FileReader(f))); + } + } + + // Here's where we do the real work... + public static void parseFile(String f, Reader r) + throws Exception { + try { + // Create a scanner that reads from the input stream passed to us + JavaLexer lexer = new JavaLexer(r); + lexer.setFilename(f); + + // Create a parser that reads from the scanner + JavaRecognizer parser = new JavaRecognizer(lexer); + parser.setFilename(f); + + // start parsing at the compilationUnit rule + parser.compilationUnit(); + + // do something with the tree + doTreeAction(f, parser.getAST(), parser.getTokenNames()); + } + catch (Exception e) { + System.err.println("parser exception: "+e); + e.printStackTrace(); // so we can get stack trace + } + } + + public static void doTreeAction(String f, AST t, String[] tokenNames) { + if ( t==null ) return; + if ( showTree ) { + ((CommonAST)t).setVerboseStringConversion(true, tokenNames); + ASTFactory factory = new ASTFactory(); + AST r = factory.create(0,"AST ROOT"); + r.setFirstChild(t); + final ASTFrame frame = new ASTFrame("Java AST", r); + frame.setVisible(true); + frame.addWindowListener( + new WindowAdapter() { + public void windowClosing (WindowEvent e) { + frame.setVisible(false); // hide the Frame + frame.dispose(); + System.exit(0); + } + } + ); + // System.out.println(t.toStringList()); + } + JavaTreeParser tparse = new JavaTreeParser(); + try { + tparse.compilationUnit(t); + // System.out.println("successful walk of result AST for "+f); + } + catch (RecognitionException e) { + System.err.println(e.getMessage()); + e.printStackTrace(); + } + + } +} + diff --git a/topics/grammars/java/antlr-java-1.3/README.txt b/topics/grammars/java/antlr-java-1.3/README.txt new file mode 100644 index 00000000..60f49c09 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/README.txt @@ -0,0 +1,6 @@ +http://www.antlr.org/grammar/list: + ↳ http://www.antlr.org/grammar/java + +Java parser and tree parser +Terence Parr Sun Jan 19, 2003 00:00 +Java: A Java 1.3 grammar written originally by (alphabetically) John Lilley, John Mitchell, Terence Parr, and Scott Stanchfield from the language spec; parses, builds trees, and walks the trees (includes java.tree.g). Updated to 2.7.2. Last updated 1/19/2003. diff --git a/topics/grammars/java/antlr-java-1.3/java.g b/topics/grammars/java/antlr-java-1.3/java.g new file mode 100644 index 00000000..6d5e2c61 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/java.g @@ -0,0 +1,1277 @@ +/** Java 1.3 Recognizer + * + * Run 'java Main [-showtree] directory-full-of-java-files' + * + * [The -showtree option pops up a Swing frame that shows + * the AST constructed from the parser.] + * + * Run 'java Main ' + * + * Contributing authors: + * John Mitchell johnm@non.net + * Terence Parr parrt@magelang.com + * John Lilley jlilley@empathy.com + * Scott Stanchfield thetick@magelang.com + * Markus Mohnen mohnen@informatik.rwth-aachen.de + * Peter Williams pete.williams@sun.com + * Allan Jacobs Allan.Jacobs@eng.sun.com + * Steve Messick messick@redhills.com + * John Pybus john@pybus.org + * + * Version 1.00 December 9, 1997 -- initial release + * Version 1.01 December 10, 1997 + * fixed bug in octal def (0..7 not 0..8) + * Version 1.10 August 1998 (parrt) + * added tree construction + * fixed definition of WS,comments for mac,pc,unix newlines + * added unary plus + * Version 1.11 (Nov 20, 1998) + * Added "shutup" option to turn off last ambig warning. + * Fixed inner class def to allow named class defs as statements + * synchronized requires compound not simple statement + * add [] after builtInType DOT class in primaryExpression + * "const" is reserved but not valid..removed from modifiers + * Version 1.12 (Feb 2, 1999) + * Changed LITERAL_xxx to xxx in tree grammar. + * Updated java.g to use tokens {...} now for 2.6.0 (new feature). + * + * Version 1.13 (Apr 23, 1999) + * Didn't have (stat)? for else clause in tree parser. + * Didn't gen ASTs for interface extends. Updated tree parser too. + * Updated to 2.6.0. + * Version 1.14 (Jun 20, 1999) + * Allowed final/abstract on local classes. + * Removed local interfaces from methods + * Put instanceof precedence where it belongs...in relationalExpr + * It also had expr not type as arg; fixed it. + * Missing ! on SEMI in classBlock + * fixed: (expr) + "string" was parsed incorrectly (+ as unary plus). + * fixed: didn't like Object[].class in parser or tree parser + * Version 1.15 (Jun 26, 1999) + * Screwed up rule with instanceof in it. :( Fixed. + * Tree parser didn't like (expr).something; fixed. + * Allowed multiple inheritance in tree grammar. oops. + * Version 1.16 (August 22, 1999) + * Extending an interface built a wacky tree: had extra EXTENDS. + * Tree grammar didn't allow multiple superinterfaces. + * Tree grammar didn't allow empty var initializer: {} + * Version 1.17 (October 12, 1999) + * ESC lexer rule allowed 399 max not 377 max. + * java.tree.g didn't handle the expression of synchronized + * statements. + * Version 1.18 (August 12, 2001) + * Terence updated to Java 2 Version 1.3 by + * observing/combining work of Allan Jacobs and Steve + * Messick. Handles 1.3 src. Summary: + * o primary didn't include boolean.class kind of thing + * o constructor calls parsed explicitly now: + * see explicitConstructorInvocation + * o add strictfp modifier + * o missing objBlock after new expression in tree grammar + * o merged local class definition alternatives, moved after declaration + * o fixed problem with ClassName.super.field + * o reordered some alternatives to make things more efficient + * o long and double constants were not differentiated from int/float + * o whitespace rule was inefficient: matched only one char + * o add an examples directory with some nasty 1.3 cases + * o made Main.java use buffered IO and a Reader for Unicode support + * o supports UNICODE? + * Using Unicode charVocabulay makes code file big, but only + * in the bitsets at the end. I need to make ANTLR generate + * unicode bitsets more efficiently. + * Version 1.19 (April 25, 2002) + * Terence added in nice fixes by John Pybus concerning floating + * constants and problems with super() calls. John did a nice + * reorg of the primary/postfix expression stuff to read better + * and makes f.g.super() parse properly (it was METHOD_CALL not + * a SUPER_CTOR_CALL). Also: + * + * o "finally" clause was a root...made it a child of "try" + * o Added stuff for asserts too for Java 1.4, but *commented out* + * as it is not backward compatible. + * + * Version 1.20 (October 27, 2002) + * + * Terence ended up reorging John Pybus' stuff to + * remove some nondeterminisms and some syntactic predicates. + * Note that the grammar is stricter now; e.g., this(...) must + * be the first statement. + * + * Trinary ?: operator wasn't working as array name: + * (isBig ? bigDigits : digits)[i]; + * + * Checked parser/tree parser on source for + * Resin-2.0.5, jive-2.1.1, jdk 1.3.1, Lucene, antlr 2.7.2a4, + * and the 110k-line jGuru server source. + * + * Version 1.21 (October 17, 2003) + * Fixed lots of problems including: + * Ray Waldin: add typeDefinition to interfaceBlock in java.tree.g + * He found a problem/fix with floating point that start with 0 + * Ray also fixed problem that (int.class) was not recognized. + * Thorsten van Ellen noticed that \n are allowed incorrectly in strings. + * TJP fixed CHAR_LITERAL analogously. + * + * This grammar is in the PUBLIC DOMAIN + */ +class JavaRecognizer extends Parser; +options { + k = 2; // two token lookahead + exportVocab=Java; // Call its vocabulary "Java" + codeGenMakeSwitchThreshold = 2; // Some optimizations + codeGenBitsetTestThreshold = 3; + defaultErrorHandler = false; // Don't generate parser error handlers + buildAST = true; +} + +tokens { + BLOCK; MODIFIERS; OBJBLOCK; SLIST; CTOR_DEF; METHOD_DEF; VARIABLE_DEF; + INSTANCE_INIT; STATIC_INIT; TYPE; CLASS_DEF; INTERFACE_DEF; + PACKAGE_DEF; ARRAY_DECLARATOR; EXTENDS_CLAUSE; IMPLEMENTS_CLAUSE; + PARAMETERS; PARAMETER_DEF; LABELED_STAT; TYPECAST; INDEX_OP; + POST_INC; POST_DEC; METHOD_CALL; EXPR; ARRAY_INIT; + IMPORT; UNARY_MINUS; UNARY_PLUS; CASE_GROUP; ELIST; FOR_INIT; FOR_CONDITION; + FOR_ITERATOR; EMPTY_STAT; FINAL="final"; ABSTRACT="abstract"; + STRICTFP="strictfp"; SUPER_CTOR_CALL; CTOR_CALL; +} + +// Compilation Unit: In Java, this is a single file. This is the start +// rule for this parser +compilationUnit + : // A compilation unit starts with an optional package definition + ( packageDefinition + | /* nothing */ + ) + + // Next we have a series of zero or more import statements + ( importDefinition )* + + // Wrapping things up with any number of class or interface + // definitions + ( typeDefinition )* + + EOF! + ; + + +// Package statement: "package" followed by an identifier. +packageDefinition + options {defaultErrorHandler = true;} // let ANTLR handle errors + : p:"package"^ {#p.setType(PACKAGE_DEF);} identifier SEMI! + ; + + +// Import statement: import followed by a package or class name +importDefinition + options {defaultErrorHandler = true;} + : i:"import"^ {#i.setType(IMPORT);} identifierStar SEMI! + ; + +// A type definition in a file is either a class or interface definition. +typeDefinition + options {defaultErrorHandler = true;} + : m:modifiers! + ( classDefinition[#m] + | interfaceDefinition[#m] + ) + | SEMI! + ; + +/** A declaration is the creation of a reference or primitive-type variable + * Create a separate Type/Var tree for each var in the var list. + */ +declaration! + : m:modifiers t:typeSpec[false] v:variableDefinitions[#m,#t] + {#declaration = #v;} + ; + +// A type specification is a type name with possible brackets afterwards +// (which would make it an array type). +typeSpec[boolean addImagNode] + : classTypeSpec[addImagNode] + | builtInTypeSpec[addImagNode] + ; + +// A class type specification is a class type with possible brackets afterwards +// (which would make it an array type). +classTypeSpec[boolean addImagNode] + : identifier (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)* + { + if ( addImagNode ) { + #classTypeSpec = #(#[TYPE,"TYPE"], #classTypeSpec); + } + } + ; + +// A builtin type specification is a builtin type with possible brackets +// afterwards (which would make it an array type). +builtInTypeSpec[boolean addImagNode] + : builtInType (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)* + { + if ( addImagNode ) { + #builtInTypeSpec = #(#[TYPE,"TYPE"], #builtInTypeSpec); + } + } + ; + +// A type name. which is either a (possibly qualified) class name or +// a primitive (builtin) type +type + : identifier + | builtInType + ; + +// The primitive types. +builtInType + : "void" + | "boolean" + | "byte" + | "char" + | "short" + | "int" + | "float" + | "long" + | "double" + ; + +// A (possibly-qualified) java identifier. We start with the first IDENT +// and expand its name by adding dots and following IDENTS +identifier + : IDENT ( DOT^ IDENT )* + ; + +identifierStar + : IDENT + ( DOT^ IDENT )* + ( DOT^ STAR )? + ; + +// A list of zero or more modifiers. We could have used (modifier)* in +// place of a call to modifiers, but I thought it was a good idea to keep +// this rule separate so they can easily be collected in a Vector if +// someone so desires +modifiers + : ( modifier )* + {#modifiers = #([MODIFIERS, "MODIFIERS"], #modifiers);} + ; + +// modifiers for Java classes, interfaces, class/instance vars and methods +modifier + : "private" + | "public" + | "protected" + | "static" + | "transient" + | "final" + | "abstract" + | "native" + | "threadsafe" + | "synchronized" +// | "const" // reserved word, but not valid + | "volatile" + | "strictfp" + ; + +// Definition of a Java class +classDefinition![AST modifiers] + : "class" IDENT + // it _might_ have a superclass... + sc:superClassClause + // it might implement some interfaces... + ic:implementsClause + // now parse the body of the class + cb:classBlock + {#classDefinition = #(#[CLASS_DEF,"CLASS_DEF"], + modifiers,IDENT,sc,ic,cb);} + ; + +superClassClause! + : ( "extends" id:identifier )? + {#superClassClause = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],id);} + ; + +// Definition of a Java Interface +interfaceDefinition![AST modifiers] + : "interface" IDENT + // it might extend some other interfaces + ie:interfaceExtends + // now parse the body of the interface (looks like a class...) + cb:classBlock + {#interfaceDefinition = #(#[INTERFACE_DEF,"INTERFACE_DEF"], + modifiers,IDENT,ie,cb);} + ; + + +// This is the body of a class. You can have fields and extra semicolons, +// That's about it (until you see what a field is...) +classBlock + : LCURLY! + ( field | SEMI! )* + RCURLY! + {#classBlock = #([OBJBLOCK, "OBJBLOCK"], #classBlock);} + ; + +// An interface can extend several other interfaces... +interfaceExtends + : ( + e:"extends"! + identifier ( COMMA! identifier )* + )? + {#interfaceExtends = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"], + #interfaceExtends);} + ; + +// A class can implement several interfaces... +implementsClause + : ( + i:"implements"! identifier ( COMMA! identifier )* + )? + {#implementsClause = #(#[IMPLEMENTS_CLAUSE,"IMPLEMENTS_CLAUSE"], + #implementsClause);} + ; + +// Now the various things that can be defined inside a class or interface... +// Note that not all of these are really valid in an interface (constructors, +// for example), and if this grammar were used for a compiler there would +// need to be some semantic checks to make sure we're doing the right thing... +field! + : // method, constructor, or variable declaration + mods:modifiers + ( h:ctorHead s:constructorBody // constructor + {#field = #(#[CTOR_DEF,"CTOR_DEF"], mods, h, s);} + + | cd:classDefinition[#mods] // inner class + {#field = #cd;} + + | id:interfaceDefinition[#mods] // inner interface + {#field = #id;} + + | t:typeSpec[false] // method or variable declaration(s) + ( IDENT // the name of the method + + // parse the formal parameter declarations. + LPAREN! param:parameterDeclarationList RPAREN! + + rt:declaratorBrackets[#t] + + // get the list of exceptions that this method is + // declared to throw + (tc:throwsClause)? + + ( s2:compoundStatement | SEMI ) + {#field = #(#[METHOD_DEF,"METHOD_DEF"], + mods, + #(#[TYPE,"TYPE"],rt), + IDENT, + param, + tc, + s2);} + | v:variableDefinitions[#mods,#t] SEMI +// {#field = #(#[VARIABLE_DEF,"VARIABLE_DEF"], v);} + {#field = #v;} + ) + ) + + // "static { ... }" class initializer + | "static" s3:compoundStatement + {#field = #(#[STATIC_INIT,"STATIC_INIT"], s3);} + + // "{ ... }" instance initializer + | s4:compoundStatement + {#field = #(#[INSTANCE_INIT,"INSTANCE_INIT"], s4);} + ; + +constructorBody + : lc:LCURLY^ {#lc.setType(SLIST);} + ( options { greedy=true; } : explicitConstructorInvocation)? + (statement)* + RCURLY! + ; + +/** Catch obvious constructor calls, but not the expr.super(...) calls */ +explicitConstructorInvocation + : "this"! lp1:LPAREN^ argList RPAREN! SEMI! + {#lp1.setType(CTOR_CALL);} + | "super"! lp2:LPAREN^ argList RPAREN! SEMI! + {#lp2.setType(SUPER_CTOR_CALL);} + ; + +variableDefinitions[AST mods, AST t] + : variableDeclarator[getASTFactory().dupTree(mods), + getASTFactory().dupTree(t)] + ( COMMA! + variableDeclarator[getASTFactory().dupTree(mods), + getASTFactory().dupTree(t)] + )* + ; + +/** Declaration of a variable. This can be a class/instance variable, + * or a local variable in a method + * It can also include possible initialization. + */ +variableDeclarator![AST mods, AST t] + : id:IDENT d:declaratorBrackets[t] v:varInitializer + {#variableDeclarator = #(#[VARIABLE_DEF,"VARIABLE_DEF"], mods, #(#[TYPE,"TYPE"],d), id, v);} + ; + +declaratorBrackets[AST typ] + : {#declaratorBrackets=typ;} + (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)* + ; + +varInitializer + : ( ASSIGN^ initializer )? + ; + +// This is an initializer used to set up an array. +arrayInitializer + : lc:LCURLY^ {#lc.setType(ARRAY_INIT);} + ( initializer + ( + // CONFLICT: does a COMMA after an initializer start a new + // initializer or start the option ',' at end? + // ANTLR generates proper code by matching + // the comma as soon as possible. + options { + warnWhenFollowAmbig = false; + } + : + COMMA! initializer + )* + (COMMA!)? + )? + RCURLY! + ; + + +// The two "things" that can initialize an array element are an expression +// and another (nested) array initializer. +initializer + : expression + | arrayInitializer + ; + +// This is the header of a method. It includes the name and parameters +// for the method. +// This also watches for a list of exception classes in a "throws" clause. +ctorHead + : IDENT // the name of the method + + // parse the formal parameter declarations. + LPAREN! parameterDeclarationList RPAREN! + + // get the list of exceptions that this method is declared to throw + (throwsClause)? + ; + +// This is a list of exception classes that the method is declared to throw +throwsClause + : "throws"^ identifier ( COMMA! identifier )* + ; + + +// A list of formal parameters +parameterDeclarationList + : ( parameterDeclaration ( COMMA! parameterDeclaration )* )? + {#parameterDeclarationList = #(#[PARAMETERS,"PARAMETERS"], + #parameterDeclarationList);} + ; + +// A formal parameter. +parameterDeclaration! + : pm:parameterModifier t:typeSpec[false] id:IDENT + pd:declaratorBrackets[#t] + {#parameterDeclaration = #(#[PARAMETER_DEF,"PARAMETER_DEF"], + pm, #([TYPE,"TYPE"],pd), id);} + ; + +parameterModifier + : (f:"final")? + {#parameterModifier = #(#[MODIFIERS,"MODIFIERS"], f);} + ; + +// Compound statement. This is used in many contexts: +// Inside a class definition prefixed with "static": +// it is a class initializer +// Inside a class definition without "static": +// it is an instance initializer +// As the body of a method +// As a completely indepdent braced block of code inside a method +// it starts a new scope for variable definitions + +compoundStatement + : lc:LCURLY^ {#lc.setType(SLIST);} + // include the (possibly-empty) list of statements + (statement)* + RCURLY! + ; + + +statement + // A list of statements in curly braces -- start a new scope! + : compoundStatement + + // declarations are ambiguous with "ID DOT" relative to expression + // statements. Must backtrack to be sure. Could use a semantic + // predicate to test symbol table to see what the type was coming + // up, but that's pretty hard without a symbol table ;) + | (declaration)=> declaration SEMI! + + // An expression statement. This could be a method call, + // assignment statement, or any other expression evaluated for + // side-effects. + | expression SEMI! + + // class definition + | m:modifiers! classDefinition[#m] + + // Attach a label to the front of a statement + | IDENT c:COLON^ {#c.setType(LABELED_STAT);} statement + + // If-else statement + | "if"^ LPAREN! expression RPAREN! statement + ( + // CONFLICT: the old "dangling-else" problem... + // ANTLR generates proper code matching + // as soon as possible. Hush warning. + options { + warnWhenFollowAmbig = false; + } + : + "else"! statement + )? + + // For statement + | "for"^ + LPAREN! + forInit SEMI! // initializer + forCond SEMI! // condition test + forIter // updater + RPAREN! + statement // statement to loop over + + // While statement + | "while"^ LPAREN! expression RPAREN! statement + + // do-while statement + | "do"^ statement "while"! LPAREN! expression RPAREN! SEMI! + + // get out of a loop (or switch) + | "break"^ (IDENT)? SEMI! + + // do next iteration of a loop + | "continue"^ (IDENT)? SEMI! + + // Return an expression + | "return"^ (expression)? SEMI! + + // switch/case statement + | "switch"^ LPAREN! expression RPAREN! LCURLY! + ( casesGroup )* + RCURLY! + + // exception try-catch block + | tryBlock + + // throw an exception + | "throw"^ expression SEMI! + + // synchronize a statement + | "synchronized"^ LPAREN! expression RPAREN! compoundStatement + + // asserts (uncomment if you want 1.4 compatibility) + // | "assert"^ expression ( COLON! expression )? SEMI! + + // empty statement + | s:SEMI {#s.setType(EMPTY_STAT);} + ; + +casesGroup + : ( // CONFLICT: to which case group do the statements bind? + // ANTLR generates proper code: it groups the + // many "case"/"default" labels together then + // follows them with the statements + options { + greedy = true; + } + : + aCase + )+ + caseSList + {#casesGroup = #([CASE_GROUP, "CASE_GROUP"], #casesGroup);} + ; + +aCase + : ("case"^ expression | "default") COLON! + ; + +caseSList + : (statement)* + {#caseSList = #(#[SLIST,"SLIST"],#caseSList);} + ; + +// The initializer for a for loop +forInit + // if it looks like a declaration, it is + : ( (declaration)=> declaration + // otherwise it could be an expression list... + | expressionList + )? + {#forInit = #(#[FOR_INIT,"FOR_INIT"],#forInit);} + ; + +forCond + : (expression)? + {#forCond = #(#[FOR_CONDITION,"FOR_CONDITION"],#forCond);} + ; + +forIter + : (expressionList)? + {#forIter = #(#[FOR_ITERATOR,"FOR_ITERATOR"],#forIter);} + ; + +// an exception handler try/catch block +tryBlock + : "try"^ compoundStatement + (handler)* + ( finallyClause )? + ; + +finallyClause + : "finally"^ compoundStatement + ; + +// an exception handler +handler + : "catch"^ LPAREN! parameterDeclaration RPAREN! compoundStatement + ; + + +// expressions +// Note that most of these expressions follow the pattern +// thisLevelExpression : +// nextHigherPrecedenceExpression +// (OPERATOR nextHigherPrecedenceExpression)* +// which is a standard recursive definition for a parsing an expression. +// The operators in java have the following precedences: +// lowest (13) = *= /= %= += -= <<= >>= >>>= &= ^= |= +// (12) ?: +// (11) || +// (10) && +// ( 9) | +// ( 8) ^ +// ( 7) & +// ( 6) == != +// ( 5) < <= > >= +// ( 4) << >> +// ( 3) +(binary) -(binary) +// ( 2) * / % +// ( 1) ++ -- +(unary) -(unary) ~ ! (type) +// [] () (method call) . (dot -- identifier qualification) +// new () (explicit parenthesis) +// +// the last two are not usually on a precedence chart; I put them in +// to point out that new has a higher precedence than '.', so you +// can validy use +// new Frame().show() +// +// Note that the above precedence levels map to the rules below... +// Once you have a precedence chart, writing the appropriate rules as below +// is usually very straightfoward + + + +// the mother of all expressions +expression + : assignmentExpression + {#expression = #(#[EXPR,"EXPR"],#expression);} + ; + + +// This is a list of expressions. +expressionList + : expression (COMMA! expression)* + {#expressionList = #(#[ELIST,"ELIST"], expressionList);} + ; + + +// assignment expression (level 13) +assignmentExpression + : conditionalExpression + ( ( ASSIGN^ + | PLUS_ASSIGN^ + | MINUS_ASSIGN^ + | STAR_ASSIGN^ + | DIV_ASSIGN^ + | MOD_ASSIGN^ + | SR_ASSIGN^ + | BSR_ASSIGN^ + | SL_ASSIGN^ + | BAND_ASSIGN^ + | BXOR_ASSIGN^ + | BOR_ASSIGN^ + ) + assignmentExpression + )? + ; + + +// conditional test (level 12) +conditionalExpression + : logicalOrExpression + ( QUESTION^ assignmentExpression COLON! conditionalExpression )? + ; + + +// logical or (||) (level 11) +logicalOrExpression + : logicalAndExpression (LOR^ logicalAndExpression)* + ; + + +// logical and (&&) (level 10) +logicalAndExpression + : inclusiveOrExpression (LAND^ inclusiveOrExpression)* + ; + + +// bitwise or non-short-circuiting or (|) (level 9) +inclusiveOrExpression + : exclusiveOrExpression (BOR^ exclusiveOrExpression)* + ; + + +// exclusive or (^) (level 8) +exclusiveOrExpression + : andExpression (BXOR^ andExpression)* + ; + + +// bitwise or non-short-circuiting and (&) (level 7) +andExpression + : equalityExpression (BAND^ equalityExpression)* + ; + + +// equality/inequality (==/!=) (level 6) +equalityExpression + : relationalExpression ((NOT_EQUAL^ | EQUAL^) relationalExpression)* + ; + + +// boolean relational expressions (level 5) +relationalExpression + : shiftExpression + ( ( ( LT^ + | GT^ + | LE^ + | GE^ + ) + shiftExpression + )* + | "instanceof"^ typeSpec[true] + ) + ; + + +// bit shift expressions (level 4) +shiftExpression + : additiveExpression ((SL^ | SR^ | BSR^) additiveExpression)* + ; + + +// binary addition/subtraction (level 3) +additiveExpression + : multiplicativeExpression ((PLUS^ | MINUS^) multiplicativeExpression)* + ; + + +// multiplication/division/modulo (level 2) +multiplicativeExpression + : unaryExpression ((STAR^ | DIV^ | MOD^ ) unaryExpression)* + ; + +unaryExpression + : INC^ unaryExpression + | DEC^ unaryExpression + | MINUS^ {#MINUS.setType(UNARY_MINUS);} unaryExpression + | PLUS^ {#PLUS.setType(UNARY_PLUS);} unaryExpression + | unaryExpressionNotPlusMinus + ; + +unaryExpressionNotPlusMinus + : BNOT^ unaryExpression + | LNOT^ unaryExpression + + // use predicate to skip cases like: (int.class) + | (LPAREN builtInTypeSpec[true] RPAREN) => + lpb:LPAREN^ {#lpb.setType(TYPECAST);} builtInTypeSpec[true] RPAREN! + unaryExpression + + // Have to backtrack to see if operator follows. If no operator + // follows, it's a typecast. No semantic checking needed to parse. + // if it _looks_ like a cast, it _is_ a cast; else it's a "(expr)" + | (LPAREN classTypeSpec[true] RPAREN unaryExpressionNotPlusMinus)=> + lp:LPAREN^ {#lp.setType(TYPECAST);} classTypeSpec[true] RPAREN! + unaryExpressionNotPlusMinus + + | postfixExpression + ; + +// qualified names, array expressions, method invocation, post inc/dec +postfixExpression + : + /* + "this"! lp1:LPAREN^ argList RPAREN! + {#lp1.setType(CTOR_CALL);} + + | "super"! lp2:LPAREN^ argList RPAREN! + {#lp2.setType(SUPER_CTOR_CALL);} + | + */ + primaryExpression + + ( + /* + options { + // the use of postfixExpression in SUPER_CTOR_CALL adds DOT + // to the lookahead set, and gives loads of false non-det + // warnings. + // shut them off. + generateAmbigWarnings=false; + } + : */ + DOT^ IDENT + ( lp:LPAREN^ {#lp.setType(METHOD_CALL);} + argList + RPAREN! + )? + | DOT^ "this" + + | DOT^ "super" + ( // (new Outer()).super() (create enclosing instance) + lp3:LPAREN^ argList RPAREN! + {#lp3.setType(SUPER_CTOR_CALL);} + | DOT^ IDENT + ( lps:LPAREN^ {#lps.setType(METHOD_CALL);} + argList + RPAREN! + )? + ) + | DOT^ newExpression + | lb:LBRACK^ {#lb.setType(INDEX_OP);} expression RBRACK! + )* + + ( // possibly add on a post-increment or post-decrement. + // allows INC/DEC on too much, but semantics can check + in:INC^ {#in.setType(POST_INC);} + | de:DEC^ {#de.setType(POST_DEC);} + )? + ; + +// the basic element of an expression +primaryExpression + : identPrimary ( options {greedy=true;} : DOT^ "class" )? + | constant + | "true" + | "false" + | "null" + | newExpression + | "this" + | "super" + | LPAREN! assignmentExpression RPAREN! + // look for int.class and int[].class + | builtInType + ( lbt:LBRACK^ {#lbt.setType(ARRAY_DECLARATOR);} RBRACK! )* + DOT^ "class" + ; + +/** Match a, a.b.c refs, a.b.c(...) refs, a.b.c[], a.b.c[].class, + * and a.b.c.class refs. Also this(...) and super(...). Match + * this or super. + */ +identPrimary + : IDENT + ( + options { + // .ident could match here or in postfixExpression. + // We do want to match here. Turn off warning. + greedy=true; + } + : DOT^ IDENT + )* + ( + options { + // ARRAY_DECLARATOR here conflicts with INDEX_OP in + // postfixExpression on LBRACK RBRACK. + // We want to match [] here, so greedy. This overcomes + // limitation of linear approximate lookahead. + greedy=true; + } + : ( lp:LPAREN^ {#lp.setType(METHOD_CALL);} argList RPAREN! ) + | ( options {greedy=true;} : + lbc:LBRACK^ {#lbc.setType(ARRAY_DECLARATOR);} RBRACK! + )+ + )? + ; + +/** object instantiation. + * Trees are built as illustrated by the following input/tree pairs: + * + * new T() + * + * new + * | + * T -- ELIST + * | + * arg1 -- arg2 -- .. -- argn + * + * new int[] + * + * new + * | + * int -- ARRAY_DECLARATOR + * + * new int[] {1,2} + * + * new + * | + * int -- ARRAY_DECLARATOR -- ARRAY_INIT + * | + * EXPR -- EXPR + * | | + * 1 2 + * + * new int[3] + * new + * | + * int -- ARRAY_DECLARATOR + * | + * EXPR + * | + * 3 + * + * new int[1][2] + * + * new + * | + * int -- ARRAY_DECLARATOR + * | + * ARRAY_DECLARATOR -- EXPR + * | | + * EXPR 1 + * | + * 2 + * + */ +newExpression + : "new"^ type + ( LPAREN! argList RPAREN! (classBlock)? + + //java 1.1 + // Note: This will allow bad constructs like + // new int[4][][3] {exp,exp}. + // There needs to be a semantic check here... + // to make sure: + // a) [ expr ] and [ ] are not mixed + // b) [ expr ] and an init are not used together + + | newArrayDeclarator (arrayInitializer)? + ) + ; + +argList + : ( expressionList + | /*nothing*/ + {#argList = #[ELIST,"ELIST"];} + ) + ; + +newArrayDeclarator + : ( + // CONFLICT: + // newExpression is a primaryExpression which can be + // followed by an array index reference. This is ok, + // as the generated code will stay in this loop as + // long as it sees an LBRACK (proper behavior) + options { + warnWhenFollowAmbig = false; + } + : + lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} + (expression)? + RBRACK! + )+ + ; + +constant + : NUM_INT + | CHAR_LITERAL + | STRING_LITERAL + | NUM_FLOAT + | NUM_LONG + | NUM_DOUBLE + ; + + +//---------------------------------------------------------------------------- +// The Java scanner +//---------------------------------------------------------------------------- +class JavaLexer extends Lexer; + +options { + exportVocab=Java; // call the vocabulary "Java" + testLiterals=false; // don't automatically test for literals + k=4; // four characters of lookahead + charVocabulary='\u0003'..'\uFFFF'; + // without inlining some bitset tests, couldn't do unicode; + // I need to make ANTLR generate smaller bitsets; see + // bottom of JavaLexer.java + codeGenBitsetTestThreshold=20; +} + + + +// OPERATORS +QUESTION : '?' ; +LPAREN : '(' ; +RPAREN : ')' ; +LBRACK : '[' ; +RBRACK : ']' ; +LCURLY : '{' ; +RCURLY : '}' ; +COLON : ':' ; +COMMA : ',' ; +//DOT : '.' ; +ASSIGN : '=' ; +EQUAL : "==" ; +LNOT : '!' ; +BNOT : '~' ; +NOT_EQUAL : "!=" ; +DIV : '/' ; +DIV_ASSIGN : "/=" ; +PLUS : '+' ; +PLUS_ASSIGN : "+=" ; +INC : "++" ; +MINUS : '-' ; +MINUS_ASSIGN : "-=" ; +DEC : "--" ; +STAR : '*' ; +STAR_ASSIGN : "*=" ; +MOD : '%' ; +MOD_ASSIGN : "%=" ; +SR : ">>" ; +SR_ASSIGN : ">>=" ; +BSR : ">>>" ; +BSR_ASSIGN : ">>>=" ; +GE : ">=" ; +GT : ">" ; +SL : "<<" ; +SL_ASSIGN : "<<=" ; +LE : "<=" ; +LT : '<' ; +BXOR : '^' ; +BXOR_ASSIGN : "^=" ; +BOR : '|' ; +BOR_ASSIGN : "|=" ; +LOR : "||" ; +BAND : '&' ; +BAND_ASSIGN : "&=" ; +LAND : "&&" ; +SEMI : ';' ; + + +// Whitespace -- ignored +WS : ( ' ' + | '\t' + | '\f' + // handle newlines + | ( options {generateAmbigWarnings=false;} + : "\r\n" // Evil DOS + | '\r' // Macintosh + | '\n' // Unix (the right way) + ) + { newline(); } + )+ + { _ttype = Token.SKIP; } + ; + +// Single-line comments +SL_COMMENT + : "//" + (~('\n'|'\r'))* ('\n'|'\r'('\n')?) + {$setType(Token.SKIP); newline();} + ; + +// multiple-line comments +ML_COMMENT + : "/*" + ( /* '\r' '\n' can be matched in one alternative or by matching + '\r' in one iteration and '\n' in another. I am trying to + handle any flavor of newline that comes in, but the language + that allows both "\r\n" and "\r" and "\n" to all be valid + newline is ambiguous. Consequently, the resulting grammar + must be ambiguous. I'm shutting this warning off. + */ + options { + generateAmbigWarnings=false; + } + : + { LA(2)!='/' }? '*' + | '\r' '\n' {newline();} + | '\r' {newline();} + | '\n' {newline();} + | ~('*'|'\n'|'\r') + )* + "*/" + {$setType(Token.SKIP);} + ; + + +// character literals +CHAR_LITERAL + : '\'' ( ESC | ~('\''|'\n'|'\r'|'\\') ) '\'' + ; + +// string literals +STRING_LITERAL + : '"' (ESC|~('"'|'\\'|'\n'|'\r'))* '"' + ; + + +// escape sequence -- note that this is protected; it can only be called +// from another lexer rule -- it will not ever directly return a token to +// the parser +// There are various ambiguities hushed in this rule. The optional +// '0'...'9' digit matches should be matched here rather than letting +// them go back to STRING_LITERAL to be matched. ANTLR does the +// right thing by matching immediately; hence, it's ok to shut off +// the FOLLOW ambig warnings. +protected +ESC + : '\\' + ( 'n' + | 'r' + | 't' + | 'b' + | 'f' + | '"' + | '\'' + | '\\' + | ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT + | '0'..'3' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + )? + )? + | '4'..'7' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + )? + ) + ; + + +// hexadecimal digit (again, note it's protected!) +protected +HEX_DIGIT + : ('0'..'9'|'A'..'F'|'a'..'f') + ; + + +// a dummy rule to force vocabulary to be all characters (except special +// ones that ANTLR uses internally (0 to 2) +protected +VOCAB + : '\3'..'\377' + ; + + +// an identifier. Note that testLiterals is set to true! This means +// that after we match the rule, we look in the literals table to see +// if it's a literal or really an identifer +IDENT + options {testLiterals=true;} + : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')* + ; + + +// a numeric literal +NUM_INT + {boolean isDecimal=false; Token t=null;} + : '.' {_ttype = DOT;} + ( ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})? + { + if (t != null && t.getText().toUpperCase().indexOf('F')>=0) { + _ttype = NUM_FLOAT; + } + else { + _ttype = NUM_DOUBLE; // assume double + } + } + )? + + | ( '0' {isDecimal = true;} // special case for just '0' + ( ('x'|'X') + ( // hex + // the 'e'|'E' and float suffix stuff look + // like hex digits, hence the (...)+ doesn't + // know when to stop: ambig. ANTLR resolves + // it correctly by matching immediately. It + // is therefor ok to hush warning. + options { + warnWhenFollowAmbig=false; + } + : HEX_DIGIT + )+ + + | //float or double with leading zero + (('0'..'9')+ ('.'|EXPONENT|FLOAT_SUFFIX)) => ('0'..'9')+ + + | ('0'..'7')+ // octal + )? + | ('1'..'9') ('0'..'9')* {isDecimal=true;} // non-zero decimal + ) + ( ('l'|'L') { _ttype = NUM_LONG; } + + // only check to see if it's a float if looks like decimal so far + | {isDecimal}? + ( '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})? + | EXPONENT (f3:FLOAT_SUFFIX {t=f3;})? + | f4:FLOAT_SUFFIX {t=f4;} + ) + { + if (t != null && t.getText().toUpperCase() .indexOf('F') >= 0) { + _ttype = NUM_FLOAT; + } + else { + _ttype = NUM_DOUBLE; // assume double + } + } + )? + ; + + +// a couple protected methods to assist in matching floating point numbers +protected +EXPONENT + : ('e'|'E') ('+'|'-')? ('0'..'9')+ + ; + + +protected +FLOAT_SUFFIX + : 'f'|'F'|'d'|'D' + ; + diff --git a/topics/grammars/java/antlr-java-1.3/java.tree.g b/topics/grammars/java/antlr-java-1.3/java.tree.g new file mode 100644 index 00000000..60b45d49 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/java.tree.g @@ -0,0 +1,324 @@ +/** Java 1.3 AST Recognizer Grammar + * + * Author: (see java.g preamble) + * + * This grammar is in the PUBLIC DOMAIN + */ +class JavaTreeParser extends TreeParser; + +options { + importVocab = Java; +} + +compilationUnit + : (packageDefinition)? + (importDefinition)* + (typeDefinition)* + ; + +packageDefinition + : #( PACKAGE_DEF identifier ) + ; + +importDefinition + : #( IMPORT identifierStar ) + ; + +typeDefinition + : #(CLASS_DEF modifiers IDENT extendsClause implementsClause objBlock ) + | #(INTERFACE_DEF modifiers IDENT extendsClause interfaceBlock ) + ; + +typeSpec + : #(TYPE typeSpecArray) + ; + +typeSpecArray + : #( ARRAY_DECLARATOR typeSpecArray ) + | type + ; + +type: identifier + | builtInType + ; + +builtInType + : "void" + | "boolean" + | "byte" + | "char" + | "short" + | "int" + | "float" + | "long" + | "double" + ; + +modifiers + : #( MODIFIERS (modifier)* ) + ; + +modifier + : "private" + | "public" + | "protected" + | "static" + | "transient" + | "final" + | "abstract" + | "native" + | "threadsafe" + | "synchronized" + | "const" + | "volatile" + | "strictfp" + ; + +extendsClause + : #(EXTENDS_CLAUSE (identifier)* ) + ; + +implementsClause + : #(IMPLEMENTS_CLAUSE (identifier)* ) + ; + + +interfaceBlock + : #( OBJBLOCK + ( methodDecl + | variableDef + | typeDefinition + )* + ) + ; + +objBlock + : #( OBJBLOCK + ( ctorDef + | methodDef + | variableDef + | typeDefinition + | #(STATIC_INIT slist) + | #(INSTANCE_INIT slist) + )* + ) + ; + +ctorDef + : #(CTOR_DEF modifiers methodHead (slist)?) + ; + +methodDecl + : #(METHOD_DEF modifiers typeSpec methodHead) + ; + +methodDef + : #(METHOD_DEF modifiers typeSpec methodHead (slist)?) + ; + +variableDef + : #(VARIABLE_DEF modifiers typeSpec variableDeclarator varInitializer) + ; + +parameterDef + : #(PARAMETER_DEF modifiers typeSpec IDENT ) + ; + +objectinitializer + : #(INSTANCE_INIT slist) + ; + +variableDeclarator + : IDENT + | LBRACK variableDeclarator + ; + +varInitializer + : #(ASSIGN initializer) + | + ; + +initializer + : expression + | arrayInitializer + ; + +arrayInitializer + : #(ARRAY_INIT (initializer)*) + ; + +methodHead + : IDENT #( PARAMETERS (parameterDef)* ) (throwsClause)? + ; + +throwsClause + : #( "throws" (identifier)* ) + ; + +identifier + : IDENT + | #( DOT identifier IDENT ) + ; + +identifierStar + : IDENT + | #( DOT identifier (STAR|IDENT) ) + ; + +slist + : #( SLIST (stat)* ) + ; + +stat: typeDefinition + | variableDef + | expression + | #(LABELED_STAT IDENT stat) + | #("if" expression stat (stat)? ) + | #( "for" + #(FOR_INIT ((variableDef)+ | elist)?) + #(FOR_CONDITION (expression)?) + #(FOR_ITERATOR (elist)?) + stat + ) + | #("while" expression stat) + | #("do" stat expression) + | #("break" (IDENT)? ) + | #("continue" (IDENT)? ) + | #("return" (expression)? ) + | #("switch" expression (caseGroup)*) + | #("throw" expression) + | #("synchronized" expression stat) + | tryBlock + | slist // nested SLIST + // uncomment to make assert JDK 1.4 stuff work + // | #("assert" expression (expression)?) + | EMPTY_STAT + ; + +caseGroup + : #(CASE_GROUP (#("case" expression) | "default")+ slist) + ; + +tryBlock + : #( "try" slist (handler)* (#("finally" slist))? ) + ; + +handler + : #( "catch" parameterDef slist ) + ; + +elist + : #( ELIST (expression)* ) + ; + +expression + : #(EXPR expr) + ; + +expr: #(QUESTION expr expr expr) // trinary operator + | #(ASSIGN expr expr) // binary operators... + | #(PLUS_ASSIGN expr expr) + | #(MINUS_ASSIGN expr expr) + | #(STAR_ASSIGN expr expr) + | #(DIV_ASSIGN expr expr) + | #(MOD_ASSIGN expr expr) + | #(SR_ASSIGN expr expr) + | #(BSR_ASSIGN expr expr) + | #(SL_ASSIGN expr expr) + | #(BAND_ASSIGN expr expr) + | #(BXOR_ASSIGN expr expr) + | #(BOR_ASSIGN expr expr) + | #(LOR expr expr) + | #(LAND expr expr) + | #(BOR expr expr) + | #(BXOR expr expr) + | #(BAND expr expr) + | #(NOT_EQUAL expr expr) + | #(EQUAL expr expr) + | #(LT expr expr) + | #(GT expr expr) + | #(LE expr expr) + | #(GE expr expr) + | #(SL expr expr) + | #(SR expr expr) + | #(BSR expr expr) + | #(PLUS expr expr) + | #(MINUS expr expr) + | #(DIV expr expr) + | #(MOD expr expr) + | #(STAR expr expr) + | #(INC expr) + | #(DEC expr) + | #(POST_INC expr) + | #(POST_DEC expr) + | #(BNOT expr) + | #(LNOT expr) + | #("instanceof" expr expr) + | #(UNARY_MINUS expr) + | #(UNARY_PLUS expr) + | primaryExpression + ; + +primaryExpression + : IDENT + | #( DOT + ( expr + ( IDENT + | arrayIndex + | "this" + | "class" + | #( "new" IDENT elist ) + | "super" + ) + | #(ARRAY_DECLARATOR typeSpecArray) + | builtInType ("class")? + ) + ) + | arrayIndex + | #(METHOD_CALL primaryExpression elist) + | ctorCall + | #(TYPECAST typeSpec expr) + | newExpression + | constant + | "super" + | "true" + | "false" + | "this" + | "null" + | typeSpec // type name used with instanceof + ; + +ctorCall + : #( CTOR_CALL elist ) + | #( SUPER_CTOR_CALL + ( elist + | primaryExpression elist + ) + ) + ; + +arrayIndex + : #(INDEX_OP expr expression) + ; + +constant + : NUM_INT + | CHAR_LITERAL + | STRING_LITERAL + | NUM_FLOAT + | NUM_DOUBLE + | NUM_LONG + ; + +newExpression + : #( "new" type + ( newArrayDeclarator (arrayInitializer)? + | elist (objBlock)? + ) + ) + + ; + +newArrayDeclarator + : #( ARRAY_DECLARATOR (newArrayDeclarator)? (expression)? ) + ; diff --git a/topics/grammars/java/antlr-java-1.3/shiplist b/topics/grammars/java/antlr-java-1.3/shiplist new file mode 100644 index 00000000..df896c93 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/shiplist @@ -0,0 +1,5 @@ +java.g +java.tree.g +Main.java +tests/E.java +tests/T.java diff --git a/topics/grammars/java/antlr-java-1.3/tests/A.java b/topics/grammars/java/antlr-java-1.3/tests/A.java new file mode 100644 index 00000000..e1d006e7 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/tests/A.java @@ -0,0 +1,12 @@ +class A { + void m() { + class E { + } + strictfp class F { + } + final class G { + } + final strictfp class H { + } + } +} diff --git a/topics/grammars/java/antlr-java-1.3/tests/B.java b/topics/grammars/java/antlr-java-1.3/tests/B.java new file mode 100644 index 00000000..d3a8a322 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/tests/B.java @@ -0,0 +1,5 @@ +public class B { + public a() { + x = Object[][].class; + } +} diff --git a/topics/grammars/java/antlr-java-1.3/tests/C.java b/topics/grammars/java/antlr-java-1.3/tests/C.java new file mode 100644 index 00000000..a1bc4dd9 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/tests/C.java @@ -0,0 +1,25 @@ +public class C { + int x; + public static void main( String args[] ) { + Class c = Integer.class; + int.class; + boolean.class.equals(T.class); + } +} + +class Duh extends C { + public Duh(int i) {i=3;} + public Duh() { + super(); + } + public Duh(Double d) { + this.super(); + } + public Duh(float f) { + super(5); + } + public Duh(String s) { + this(); + Duh.super.x; + } +} diff --git a/topics/grammars/java/antlr-java-1.3/tests/D.java b/topics/grammars/java/antlr-java-1.3/tests/D.java new file mode 100644 index 00000000..535b2847 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/tests/D.java @@ -0,0 +1,32 @@ +class D { + int f; +} +class C { + D d=new D(); + C(int x) { this(); } + D getD() { return d; } + C x = this; +} +class B { + class H1 extends C { + H1() { + super(); + } + C x = super; + } + int getC() { return new C().getD().f; } +} +class A0 { + void x() { + new B().getC().f; + } + class subH1 extends B.H1 { + subH1() { + new B().super(); + } + subH1(B enclosingInstance) { + enclosingInstance.super(); + enclosingInstance.getC(); + } + } +} diff --git a/topics/grammars/java/antlr-java-1.3/tests/E.java b/topics/grammars/java/antlr-java-1.3/tests/E.java new file mode 100644 index 00000000..f5bea784 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/tests/E.java @@ -0,0 +1,24 @@ +public class E { + public static void main(String[] args) { + new ChildOfInner(); + } +} + +class Outer { + public Outer () { + System.out.println("Outer()"); + } + class Inner { + public Inner() { + System.out.println("Inner()"); + } + } +} + +class ChildOfInner extends Outer.Inner { + ChildOfInner() { + (new Outer()).super(); + // super(); --> makes no sense here; no enclosing Outer instance + System.out.println("ChildOfInner()"); + } +} diff --git a/topics/grammars/java/antlr-java-1.3/tests/T.java b/topics/grammars/java/antlr-java-1.3/tests/T.java new file mode 100644 index 00000000..2341147b --- /dev/null +++ b/topics/grammars/java/antlr-java-1.3/tests/T.java @@ -0,0 +1,22 @@ +class T { + static { int i; } + + public T() { + this( (int) (r * 255), (int) (g * 255)); + } + + void foo() { + Class c = (int.class); + final class U { int i; } + Class c = int[].class; + Class d = Object[].class; + t.new T(); + ((T)t).method(); + return "[i=" + (value) + "]"; + int q = (int)+3; + int z = (int)4; + int y = (z)+5; + String s = (String) "ff"; + String t = (s)+"blort"; + } +} diff --git a/topics/grammars/java/antlr-java-1.4/README.txt b/topics/grammars/java/antlr-java-1.4/README.txt new file mode 100644 index 00000000..c39bc641 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.4/README.txt @@ -0,0 +1,7 @@ +http://www.antlr.org/grammar/list: + ↳ http://www.thecortex.net/clover/generics/javaG.g (expired domain) + ↳ http://replay.waybackmachine.org/20040313112346/http://www.thecortex.net/clover/generics/javaG.g + +Java 1.4 grammar update +Terence Parr (submitter) Mon Mar 1, 2004 11:41 +Cortex eBusiness, makers of Clover code coverate tool for java, have provided this update on their site. :) diff --git a/topics/grammars/java/antlr-java-1.4/javaG.g b/topics/grammars/java/antlr-java-1.4/javaG.g new file mode 100644 index 00000000..725206c3 --- /dev/null +++ b/topics/grammars/java/antlr-java-1.4/javaG.g @@ -0,0 +1,1364 @@ +/** Java 1.5/JSR14 Recognizer + * + * Run 'java Main [-showtree] directory-full-of-java-files' + * + * [The -showtree option pops up a Swing frame that shows + * the AST constructed from the parser.] + * + * Run 'java Main ' + * + * Contributing authors: + * John Mitchell johnm@non.net + * Terence Parr parrt@magelang.com + * John Lilley jlilley@empathy.com + * Scott Stanchfield thetick@magelang.com + * Markus Mohnen mohnen@informatik.rwth-aachen.de + * Peter Williams pete.williams@sun.com + * Allan Jacobs Allan.Jacobs@eng.sun.com + * Steve Messick messick@redhills.com + * John Pybus john@pybus.org + * + * Version 1.00 December 9, 1997 -- initial release + * Version 1.01 December 10, 1997 + * fixed bug in octal def (0..7 not 0..8) + * Version 1.10 August 1998 (parrt) + * added tree construction + * fixed definition of WS,comments for mac,pc,unix newlines + * added unary plus + * Version 1.11 (Nov 20, 1998) + * Added "shutup" option to turn off last ambig warning. + * Fixed inner class def to allow named class defs as statements + * synchronized requires compound not simple statement + * add [] after builtInType DOT class in primaryExpression + * "const" is reserved but not valid..removed from modifiers + * Version 1.12 (Feb 2, 1999) + * Changed LITERAL_xxx to xxx in tree grammar. + * Updated java.g to use tokens {...} now for 2.6.0 (new feature). + * + * Version 1.13 (Apr 23, 1999) + * Didn't have (stat)? for else clause in tree parser. + * Didn't gen ASTs for interface extends. Updated tree parser too. + * Updated to 2.6.0. + * Version 1.14 (Jun 20, 1999) + * Allowed final/abstract on local classes. + * Removed local interfaces from methods + * Put instanceof precedence where it belongs...in relationalExpr + * It also had expr not type as arg; fixed it. + * Missing ! on SEMI in classBlock + * fixed: (expr) + "string" was parsed incorrectly (+ as unary plus). + * fixed: didn't like Object[].class in parser or tree parser + * Version 1.15 (Jun 26, 1999) + * Screwed up rule with instanceof in it. :( Fixed. + * Tree parser didn't like (expr).something; fixed. + * Allowed multiple inheritance in tree grammar. oops. + * Version 1.16 (August 22, 1999) + * Extending an interface built a wacky tree: had extra EXTENDS. + * Tree grammar didn't allow multiple superinterfaces. + * Tree grammar didn't allow empty var initializer: {} + * Version 1.17 (October 12, 1999) + * ESC lexer rule allowed 399 max not 377 max. + * java.tree.g didn't handle the expression of synchronized + * statements. + * Version 1.18 (August 12, 2001) + * Terence updated to Java 2 Version 1.3 by + * observing/combining work of Allan Jacobs and Steve + * Messick. Handles 1.3 src. Summary: + * o primary didn't include boolean.class kind of thing + * o constructor calls parsed explicitly now: + * see explicitConstructorInvocation + * o add strictfp modifier + * o missing objBlock after new expression in tree grammar + * o merged local class definition alternatives, moved after declaration + * o fixed problem with ClassName.super.field + * o reordered some alternatives to make things more efficient + * o long and double constants were not differentiated from int/float + * o whitespace rule was inefficient: matched only one char + * o add an examples directory with some nasty 1.3 cases + * o made Main.java use buffered IO and a Reader for Unicode support + * o supports UNICODE? + * Using Unicode charVocabulay makes code file big, but only + * in the bitsets at the end. I need to make ANTLR generate + * unicode bitsets more efficiently. + * Version 1.19 (April 25, 2002) + * Terence added in nice fixes by John Pybus concerning floating + * constants and problems with super() calls. John did a nice + * reorg of the primary/postfix expression stuff to read better + * and makes f.g.super() parse properly (it was METHOD_CALL not + * a SUPER_CTOR_CALL). Also: + * + * o "finally" clause was a root...made it a child of "try" + * o Added stuff for asserts too for Java 1.4, but *commented out* + * as it is not backward compatible. + * + * Version 1.20 (October 27, 2002) + * + * Terence ended up reorging John Pybus' stuff to + * remove some nondeterminisms and some syntactic predicates. + * Note that the grammar is stricter now; e.g., this(...) must + * be the first statement. + * + * Trinary ?: operator wasn't working as array name: + * (isBig ? bigDigits : digits)[i]; + * + * Checked parser/tree parser on source for + * Resin-2.0.5, jive-2.1.1, jdk 1.3.1, Lucene, antlr 2.7.2a4, + * and the 110k-line jGuru server source. + * + * Version 1.21.2 (March, 2003) + * Changes by Matt Quail to support generics (as per JDK1.5/JSR14) + * Notes: + * o We only allow the "extends" keyword and not the "implements" + * keyword, since thats what JSR14 seems to imply. + * o Thanks to Monty Zukowski for his help on the antlr-interest + * mail list. + * o Thanks to Alan Eliasen for testing the grammar over his + * Fink source base + * + * + * This grammar is in the PUBLIC DOMAIN + */ +class JavaRecognizer extends Parser; +options { + k = 2; // two token lookahead + exportVocab=Java; // Call its vocabulary "Java" + codeGenMakeSwitchThreshold = 2; // Some optimizations + codeGenBitsetTestThreshold = 3; + defaultErrorHandler = false; // Don't generate parser error handlers + buildAST = true; +} + +tokens { + BLOCK; MODIFIERS; OBJBLOCK; SLIST; CTOR_DEF; METHOD_DEF; VARIABLE_DEF; + INSTANCE_INIT; STATIC_INIT; TYPE; CLASS_DEF; INTERFACE_DEF; + PACKAGE_DEF; ARRAY_DECLARATOR; EXTENDS_CLAUSE; IMPLEMENTS_CLAUSE; + PARAMETERS; PARAMETER_DEF; LABELED_STAT; TYPECAST; INDEX_OP; + POST_INC; POST_DEC; METHOD_CALL; EXPR; ARRAY_INIT; + IMPORT; UNARY_MINUS; UNARY_PLUS; CASE_GROUP; ELIST; FOR_INIT; FOR_CONDITION; + FOR_ITERATOR; EMPTY_STAT; FINAL="final"; ABSTRACT="abstract"; + STRICTFP="strictfp"; SUPER_CTOR_CALL; CTOR_CALL; +} + +{ + /** + * Counts the number of LT seen in the typeArguments production. + * It is used in semantic predicates to ensure we have seen + * enough closing '>' characters; which actually may have been + * either GT, SR or BSR tokens. + */ + private int ltCounter = 0; + +} +// Compilation Unit: In Java, this is a single file. This is the start +// rule for this parser +compilationUnit + : // A compilation unit starts with an optional package definition + ( packageDefinition + | /* nothing */ + ) + + // Next we have a series of zero or more import statements + ( importDefinition )* + + // Wrapping things up with any number of class or interface + // definitions + ( typeDefinition )* + + EOF! + ; + + +// Package statement: "package" followed by an identifier. +packageDefinition + options {defaultErrorHandler = true;} // let ANTLR handle errors + : p:"package"^ {#p.setType(PACKAGE_DEF);} identifier SEMI! + ; + + +// Import statement: import followed by a package or class name +importDefinition + options {defaultErrorHandler = true;} + : i:"import"^ {#i.setType(IMPORT);} identifierStar SEMI! + ; + +// A type definition in a file is either a class or interface definition. +typeDefinition + options {defaultErrorHandler = true;} + : m:modifiers! + ( classDefinition[#m] + | interfaceDefinition[#m] + ) + | SEMI! + ; + +/** A declaration is the creation of a reference or primitive-type variable + * Create a separate Type/Var tree for each var in the var list. + */ +declaration! + : m:modifiers t:typeSpec[false] v:variableDefinitions[#m,#t] + {#declaration = #v;} + ; + +// A type specification is a type name with possible brackets afterwards +// (which would make it an array type). +typeSpec[boolean addImagNode] + : classTypeSpec[addImagNode] + | builtInTypeSpec[addImagNode] + ; + +arraySpecOpt: + (options{greedy=true;}: // match as many as possible + lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK! + )* + ; + +// A class type specification is a class type with either: +// - possible brackets afterwards +// (which would make it an array type). +// - generic type arguments after +classTypeSpec[boolean addImagNode] + : classOrInterfaceType[addImagNode] + arraySpecOpt + { + if ( addImagNode ) { + #classTypeSpec = #(#[TYPE,"TYPE"], #classTypeSpec); + } + } + ; +classOrInterfaceType[boolean addImagNode] + : IDENT (typeArguments[addImagNode])? + (options{greedy=true;}: // match as many as possible + DOT + IDENT (typeArguments[addImagNode])? + )* + ; +typeArguments[boolean addImagNode] +{int currentLtLevel = 0;} + : + {currentLtLevel = ltCounter;} + LT {ltCounter++;} + classTypeSpec[addImagNode] + (options{greedy=true;}: // match as many as possible + COMMA classTypeSpec [addImagNode] + )* + + ( // turn warning off since Antlr generates the right code, + // plus we have our semantic predicate below + options{generateAmbigWarnings=false;}: + typeArgumentsEnd + )? + + // make sure we have gobbled up enough '>' characters + // if we are at the "top level" of nested typeArgument productions + {(currentLtLevel != 0) || ltCounter == currentLtLevel}? + ; + +// this gobbles up *some* amount of '>' characters, and counts how many +// it gobbled. +protected typeArgumentsEnd: + GT {ltCounter-=1;} + | SR {ltCounter-=2;} + | BSR {ltCounter-=3;} + ; + +// A builtin type specification is a builtin type with possible brackets +// afterwards (which would make it an array type). +builtInTypeSpec[boolean addImagNode] + : builtInType arraySpecOpt + { + if ( addImagNode ) { + #builtInTypeSpec = #(#[TYPE,"TYPE"], #builtInTypeSpec); + } + } + ; + +// A type name. which is either a (possibly qualified and parameterized) +// class name or a primitive (builtin) type +type + : classOrInterfaceType[false] + | builtInType + ; + +// The primitive types. +builtInType + : "void" + | "boolean" + | "byte" + | "char" + | "short" + | "int" + | "float" + | "long" + | "double" + ; + +// A (possibly-qualified) java identifier. We start with the first IDENT +// and expand its name by adding dots and following IDENTS +identifier + : IDENT ( DOT^ IDENT )* + ; + +identifierStar + : IDENT + ( DOT^ IDENT )* + ( DOT^ STAR )? + ; + +// A list of zero or more modifiers. We could have used (modifier)* in +// place of a call to modifiers, but I thought it was a good idea to keep +// this rule separate so they can easily be collected in a Vector if +// someone so desires +modifiers + : ( modifier )* + {#modifiers = #([MODIFIERS, "MODIFIERS"], #modifiers);} + ; + +// modifiers for Java classes, interfaces, class/instance vars and methods +modifier + : "private" + | "public" + | "protected" + | "static" + | "transient" + | "final" + | "abstract" + | "native" + | "threadsafe" + | "synchronized" +// | "const" // reserved word, but not valid + | "volatile" + | "strictfp" + ; + +// Definition of a Java class +classDefinition![AST modifiers] + : "class" IDENT + // it _might_ have type paramaters + (typeParameters)? + // it _might_ have a superclass... + sc:superClassClause + // it might implement some interfaces... + ic:implementsClause + // now parse the body of the class + cb:classBlock + {#classDefinition = #(#[CLASS_DEF,"CLASS_DEF"], + modifiers,IDENT,sc,ic,cb);} + ; + +superClassClause! + : ( "extends" classOrInterfaceType[false] )? + //{#superClassClause = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"],id);} + ; + +// Definition of a Java Interface +interfaceDefinition![AST modifiers] + : "interface" IDENT + // it _might_ have type paramaters + (typeParameters)? + // it might extend some other interfaces + ie:interfaceExtends + // now parse the body of the interface (looks like a class...) + cb:classBlock + {#interfaceDefinition = #(#[INTERFACE_DEF,"INTERFACE_DEF"], + modifiers,IDENT,ie,cb);} + ; + +typeParameters +{int currentLtLevel = 0;} + : + {currentLtLevel = ltCounter;} + LT {ltCounter++;} + typeParameter (COMMA typeParameter)* + (typeArgumentsEnd)? + // make sure we have gobbled up enough '>' characters + // if we are at the "top level" of nested typeArgument productions + {(currentLtLevel != 0) || ltCounter == currentLtLevel}? + ; + +typeParameter: + IDENT + ( // I'm pretty sure Antlr generates the right thing here: + options{generateAmbigWarnings=false;}: + "extends" classOrInterfaceType[false] + (BAND classOrInterfaceType[false])* + )? + ; +// This is the body of a class. You can have fields and extra semicolons, +// That's about it (until you see what a field is...) +classBlock + : LCURLY! + ( field | SEMI! )* + RCURLY! + {#classBlock = #([OBJBLOCK, "OBJBLOCK"], #classBlock);} + ; + +// An interface can extend several other interfaces... +interfaceExtends + : ( + e:"extends"! + classOrInterfaceType[false] ( COMMA! classOrInterfaceType[false] )* + )? + {#interfaceExtends = #(#[EXTENDS_CLAUSE,"EXTENDS_CLAUSE"], + #interfaceExtends);} + ; + +// A class can implement several interfaces... +implementsClause + : ( + i:"implements"! classOrInterfaceType[false] ( COMMA! classOrInterfaceType[false] )* + )? + {#implementsClause = #(#[IMPLEMENTS_CLAUSE,"IMPLEMENTS_CLAUSE"], + #implementsClause);} + ; + +// Now the various things that can be defined inside a class or interface... +// Note that not all of these are really valid in an interface (constructors, +// for example), and if this grammar were used for a compiler there would +// need to be some semantic checks to make sure we're doing the right thing... +field! + : // method, constructor, or variable declaration + mods:modifiers + ( h:ctorHead s:constructorBody // constructor + {#field = #(#[CTOR_DEF,"CTOR_DEF"], mods, h, s);} + + | cd:classDefinition[#mods] // inner class + {#field = #cd;} + + | id:interfaceDefinition[#mods] // inner interface + {#field = #id;} + + | // A generic method has the typeParameters before the return type. + // This is not allowed for variable definitions, but this production + // allows it, a semantic check could be used if you wanted. + (typeParameters)? t:typeSpec[false] // method or variable declaration(s) + ( IDENT // the name of the method + + // parse the formal parameter declarations. + LPAREN! param:parameterDeclarationList RPAREN! + + rt:declaratorBrackets[#t] + + // get the list of exceptions that this method is + // declared to throw + (tc:throwsClause)? + + ( s2:compoundStatement | SEMI ) + {#field = #(#[METHOD_DEF,"METHOD_DEF"], + mods, + #(#[TYPE,"TYPE"],rt), + IDENT, + param, + tc, + s2);} + | v:variableDefinitions[#mods,#t] SEMI +// {#field = #(#[VARIABLE_DEF,"VARIABLE_DEF"], v);} + {#field = #v;} + ) + ) + + // "static { ... }" class initializer + | "static" s3:compoundStatement + {#field = #(#[STATIC_INIT,"STATIC_INIT"], s3);} + + // "{ ... }" instance initializer + | s4:compoundStatement + {#field = #(#[INSTANCE_INIT,"INSTANCE_INIT"], s4);} + ; + +constructorBody + : lc:LCURLY^ {#lc.setType(SLIST);} + ( options { greedy=true; } : explicitConstructorInvocation)? + (statement)* + RCURLY! + ; + +/** Catch obvious constructor calls, but not the expr.super(...) calls */ +explicitConstructorInvocation + : "this"! lp1:LPAREN^ argList RPAREN! SEMI! + {#lp1.setType(CTOR_CALL);} + | "super"! lp2:LPAREN^ argList RPAREN! SEMI! + {#lp2.setType(SUPER_CTOR_CALL);} + ; + +variableDefinitions[AST mods, AST t] + : variableDeclarator[getASTFactory().dupTree(mods), + getASTFactory().dupTree(t)] + ( COMMA! + variableDeclarator[getASTFactory().dupTree(mods), + getASTFactory().dupTree(t)] + )* + ; + +/** Declaration of a variable. This can be a class/instance variable, + * or a local variable in a method + * It can also include possible initialization. + */ +variableDeclarator![AST mods, AST t] + : id:IDENT d:declaratorBrackets[t] v:varInitializer + {#variableDeclarator = #(#[VARIABLE_DEF,"VARIABLE_DEF"], mods, #(#[TYPE,"TYPE"],d), id, v);} + ; + +declaratorBrackets[AST typ] + : {#declaratorBrackets=typ;} + (lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} RBRACK!)* + ; + +varInitializer + : ( ASSIGN^ initializer )? + ; + +// This is an initializer used to set up an array. +arrayInitializer + : lc:LCURLY^ {#lc.setType(ARRAY_INIT);} + ( initializer + ( + // CONFLICT: does a COMMA after an initializer start a new + // initializer or start the option ',' at end? + // ANTLR generates proper code by matching + // the comma as soon as possible. + options { + warnWhenFollowAmbig = false; + } + : + COMMA! initializer + )* + (COMMA!)? + )? + RCURLY! + ; + + +// The two "things" that can initialize an array element are an expression +// and another (nested) array initializer. +initializer + : expression + | arrayInitializer + ; + +// This is the header of a method. It includes the name and parameters +// for the method. +// This also watches for a list of exception classes in a "throws" clause. +ctorHead + : IDENT // the name of the method + + // parse the formal parameter declarations. + LPAREN! parameterDeclarationList RPAREN! + + // get the list of exceptions that this method is declared to throw + (throwsClause)? + ; + +// This is a list of exception classes that the method is declared to throw +throwsClause + : "throws"^ identifier ( COMMA! identifier )* + ; + + +// A list of formal parameters +parameterDeclarationList + : ( parameterDeclaration ( COMMA! parameterDeclaration )* )? + {#parameterDeclarationList = #(#[PARAMETERS,"PARAMETERS"], + #parameterDeclarationList);} + ; + +// A formal parameter. +parameterDeclaration! + : pm:parameterModifier t:typeSpec[false] id:IDENT + pd:declaratorBrackets[#t] + {#parameterDeclaration = #(#[PARAMETER_DEF,"PARAMETER_DEF"], + pm, #([TYPE,"TYPE"],pd), id);} + ; + +parameterModifier + : (f:"final")? + {#parameterModifier = #(#[MODIFIERS,"MODIFIERS"], f);} + ; + +// Compound statement. This is used in many contexts: +// Inside a class definition prefixed with "static": +// it is a class initializer +// Inside a class definition without "static": +// it is an instance initializer +// As the body of a method +// As a completely indepdent braced block of code inside a method +// it starts a new scope for variable definitions + +compoundStatement + : lc:LCURLY^ {#lc.setType(SLIST);} + // include the (possibly-empty) list of statements + (statement)* + RCURLY! + ; + + +statement + // A list of statements in curly braces -- start a new scope! + : compoundStatement + + // declarations are ambiguous with "ID DOT" relative to expression + // statements. Must backtrack to be sure. Could use a semantic + // predicate to test symbol table to see what the type was coming + // up, but that's pretty hard without a symbol table ;) + | (declaration)=> declaration SEMI! + + // An expression statement. This could be a method call, + // assignment statement, or any other expression evaluated for + // side-effects. + | expression SEMI! + + // class definition + | m:modifiers! classDefinition[#m] + + // Attach a label to the front of a statement + | IDENT c:COLON^ {#c.setType(LABELED_STAT);} statement + + // If-else statement + | "if"^ LPAREN! expression RPAREN! statement + ( + // CONFLICT: the old "dangling-else" problem... + // ANTLR generates proper code matching + // as soon as possible. Hush warning. + options { + warnWhenFollowAmbig = false; + } + : + "else"! statement + )? + + // For statement + | "for"^ + LPAREN! + forInit SEMI! // initializer + forCond SEMI! // condition test + forIter // updater + RPAREN! + statement // statement to loop over + + // While statement + | "while"^ LPAREN! expression RPAREN! statement + + // do-while statement + | "do"^ statement "while"! LPAREN! expression RPAREN! SEMI! + + // get out of a loop (or switch) + | "break"^ (IDENT)? SEMI! + + // do next iteration of a loop + | "continue"^ (IDENT)? SEMI! + + // Return an expression + | "return"^ (expression)? SEMI! + + // switch/case statement + | "switch"^ LPAREN! expression RPAREN! LCURLY! + ( casesGroup )* + RCURLY! + + // exception try-catch block + | tryBlock + + // throw an exception + | "throw"^ expression SEMI! + + // synchronize a statement + | "synchronized"^ LPAREN! expression RPAREN! compoundStatement + + // asserts (uncomment if you want 1.4 compatibility) + //| "assert"^ expression ( COLON! expression )? SEMI! + + // empty statement + | s:SEMI {#s.setType(EMPTY_STAT);} + ; + +casesGroup + : ( // CONFLICT: to which case group do the statements bind? + // ANTLR generates proper code: it groups the + // many "case"/"default" labels together then + // follows them with the statements + options { + greedy = true; + } + : + aCase + )+ + caseSList + {#casesGroup = #([CASE_GROUP, "CASE_GROUP"], #casesGroup);} + ; + +aCase + : ("case"^ expression | "default") COLON! + ; + +caseSList + : (statement)* + {#caseSList = #(#[SLIST,"SLIST"],#caseSList);} + ; + +// The initializer for a for loop +forInit + // if it looks like a declaration, it is + : ( (declaration)=> declaration + // otherwise it could be an expression list... + | expressionList + )? + {#forInit = #(#[FOR_INIT,"FOR_INIT"],#forInit);} + ; + +forCond + : (expression)? + {#forCond = #(#[FOR_CONDITION,"FOR_CONDITION"],#forCond);} + ; + +forIter + : (expressionList)? + {#forIter = #(#[FOR_ITERATOR,"FOR_ITERATOR"],#forIter);} + ; + +// an exception handler try/catch block +tryBlock + : "try"^ compoundStatement + (handler)* + ( finallyClause )? + ; + +finallyClause + : "finally"^ compoundStatement + ; + +// an exception handler +handler + : "catch"^ LPAREN! parameterDeclaration RPAREN! compoundStatement + ; + + +// expressions +// Note that most of these expressions follow the pattern +// thisLevelExpression : +// nextHigherPrecedenceExpression +// (OPERATOR nextHigherPrecedenceExpression)* +// which is a standard recursive definition for a parsing an expression. +// The operators in java have the following precedences: +// lowest (13) = *= /= %= += -= <<= >>= >>>= &= ^= |= +// (12) ?: +// (11) || +// (10) && +// ( 9) | +// ( 8) ^ +// ( 7) & +// ( 6) == != +// ( 5) < <= > >= +// ( 4) << >> +// ( 3) +(binary) -(binary) +// ( 2) * / % +// ( 1) ++ -- +(unary) -(unary) ~ ! (type) +// [] () (method call) . (dot -- identifier qualification) +// new () (explicit parenthesis) +// +// the last two are not usually on a precedence chart; I put them in +// to point out that new has a higher precedence than '.', so you +// can validy use +// new Frame().show() +// +// Note that the above precedence levels map to the rules below... +// Once you have a precedence chart, writing the appropriate rules as below +// is usually very straightfoward + + + +// the mother of all expressions +expression + : assignmentExpression + {#expression = #(#[EXPR,"EXPR"],#expression);} + ; + + +// This is a list of expressions. +expressionList + : expression (COMMA! expression)* + {#expressionList = #(#[ELIST,"ELIST"], expressionList);} + ; + + +// assignment expression (level 13) +assignmentExpression + : conditionalExpression + ( ( ASSIGN^ + | PLUS_ASSIGN^ + | MINUS_ASSIGN^ + | STAR_ASSIGN^ + | DIV_ASSIGN^ + | MOD_ASSIGN^ + | SR_ASSIGN^ + | BSR_ASSIGN^ + | SL_ASSIGN^ + | BAND_ASSIGN^ + | BXOR_ASSIGN^ + | BOR_ASSIGN^ + ) + assignmentExpression + )? + ; + + +// conditional test (level 12) +conditionalExpression + : logicalOrExpression + ( QUESTION^ assignmentExpression COLON! conditionalExpression )? + ; + + +// logical or (||) (level 11) +logicalOrExpression + : logicalAndExpression (LOR^ logicalAndExpression)* + ; + + +// logical and (&&) (level 10) +logicalAndExpression + : inclusiveOrExpression (LAND^ inclusiveOrExpression)* + ; + + +// bitwise or non-short-circuiting or (|) (level 9) +inclusiveOrExpression + : exclusiveOrExpression (BOR^ exclusiveOrExpression)* + ; + + +// exclusive or (^) (level 8) +exclusiveOrExpression + : andExpression (BXOR^ andExpression)* + ; + + +// bitwise or non-short-circuiting and (&) (level 7) +andExpression + : equalityExpression (BAND^ equalityExpression)* + ; + + +// equality/inequality (==/!=) (level 6) +equalityExpression + : relationalExpression ((NOT_EQUAL^ | EQUAL^) relationalExpression)* + ; + + +// boolean relational expressions (level 5) +relationalExpression + : shiftExpression + ( ( ( LT^ + | GT^ + | LE^ + | GE^ + ) + shiftExpression + )* + | "instanceof"^ typeSpec[true] + ) + ; + + +// bit shift expressions (level 4) +shiftExpression + : additiveExpression ((SL^ | SR^ | BSR^) additiveExpression)* + ; + + +// binary addition/subtraction (level 3) +additiveExpression + : multiplicativeExpression ((PLUS^ | MINUS^) multiplicativeExpression)* + ; + + +// multiplication/division/modulo (level 2) +multiplicativeExpression + : unaryExpression ((STAR^ | DIV^ | MOD^ ) unaryExpression)* + ; + +unaryExpression + : INC^ unaryExpression + | DEC^ unaryExpression + | MINUS^ {#MINUS.setType(UNARY_MINUS);} unaryExpression + | PLUS^ {#PLUS.setType(UNARY_PLUS);} unaryExpression + | unaryExpressionNotPlusMinus + ; + +unaryExpressionNotPlusMinus + : BNOT^ unaryExpression + | LNOT^ unaryExpression + + | ( // subrule allows option to shut off warnings + options { + // "(int" ambig with postfixExpr due to lack of sequence + // info in linear approximate LL(k). It's ok. Shut up. + generateAmbigWarnings=false; + } + : // If typecast is built in type, must be numeric operand + // Also, no reason to backtrack if type keyword like int, float... + lpb:LPAREN^ {#lpb.setType(TYPECAST);} builtInTypeSpec[true] RPAREN! + unaryExpression + + // Have to backtrack to see if operator follows. If no operator + // follows, it's a typecast. No semantic checking needed to parse. + // if it _looks_ like a cast, it _is_ a cast; else it's a "(expr)" + | (LPAREN classTypeSpec[true] RPAREN unaryExpressionNotPlusMinus)=> + lp:LPAREN^ {#lp.setType(TYPECAST);} classTypeSpec[true] RPAREN! + unaryExpressionNotPlusMinus + + | postfixExpression + ) + ; + +// qualified names, array expressions, method invocation, post inc/dec +postfixExpression + : + /* + "this"! lp1:LPAREN^ argList RPAREN! + {#lp1.setType(CTOR_CALL);} + + | "super"! lp2:LPAREN^ argList RPAREN! + {#lp2.setType(SUPER_CTOR_CALL);} + | + */ + primaryExpression + + ( + /* + options { + // the use of postfixExpression in SUPER_CTOR_CALL adds DOT + // to the lookahead set, and gives loads of false non-det + // warnings. + // shut them off. + generateAmbigWarnings=false; + } + : */ + DOT^ IDENT + ( lp:LPAREN^ {#lp.setType(METHOD_CALL);} + argList + RPAREN! + )? + | DOT^ "this" + + | DOT^ "super" + ( // (new Outer()).super() (create enclosing instance) + lp3:LPAREN^ argList RPAREN! + {#lp3.setType(SUPER_CTOR_CALL);} + | DOT^ IDENT + ( lps:LPAREN^ {#lps.setType(METHOD_CALL);} + argList + RPAREN! + )? + ) + | DOT^ newExpression + | lb:LBRACK^ {#lb.setType(INDEX_OP);} expression RBRACK! + )* + + ( // possibly add on a post-increment or post-decrement. + // allows INC/DEC on too much, but semantics can check + in:INC^ {#in.setType(POST_INC);} + | de:DEC^ {#de.setType(POST_DEC);} + )? + ; + +// the basic element of an expression +primaryExpression + : identPrimary ( options {greedy=true;} : DOT^ "class" )? + | constant + | "true" + | "false" + | "null" + | newExpression + | "this" + | "super" + | LPAREN! assignmentExpression RPAREN! + // look for int.class and int[].class + | builtInType + ( lbt:LBRACK^ {#lbt.setType(ARRAY_DECLARATOR);} RBRACK! )* + DOT^ "class" + ; + +/** Match a, a.b.c refs, a.b.c(...) refs, a.b.c[], a.b.c[].class, + * and a.b.c.class refs. Also this(...) and super(...). Match + * this or super. + */ +identPrimary + : IDENT + ( + options { + // .ident could match here or in postfixExpression. + // We do want to match here. Turn off warning. + greedy=true; + } + : DOT^ IDENT + )* + ( + options { + // ARRAY_DECLARATOR here conflicts with INDEX_OP in + // postfixExpression on LBRACK RBRACK. + // We want to match [] here, so greedy. This overcomes + // limitation of linear approximate lookahead. + greedy=true; + } + : ( lp:LPAREN^ {#lp.setType(METHOD_CALL);} argList RPAREN! ) + | ( options {greedy=true;} : + lbc:LBRACK^ {#lbc.setType(ARRAY_DECLARATOR);} RBRACK! + )+ + )? + ; + +/** object instantiation. + * Trees are built as illustrated by the following input/tree pairs: + * + * new T() + * + * new + * | + * T -- ELIST + * | + * arg1 -- arg2 -- .. -- argn + * + * new int[] + * + * new + * | + * int -- ARRAY_DECLARATOR + * + * new int[] {1,2} + * + * new + * | + * int -- ARRAY_DECLARATOR -- ARRAY_INIT + * | + * EXPR -- EXPR + * | | + * 1 2 + * + * new int[3] + * new + * | + * int -- ARRAY_DECLARATOR + * | + * EXPR + * | + * 3 + * + * new int[1][2] + * + * new + * | + * int -- ARRAY_DECLARATOR + * | + * ARRAY_DECLARATOR -- EXPR + * | | + * EXPR 1 + * | + * 2 + * + */ +newExpression + : "new"^ type + ( LPAREN! argList RPAREN! (classBlock)? + + //java 1.1 + // Note: This will allow bad constructs like + // new int[4][][3] {exp,exp}. + // There needs to be a semantic check here... + // to make sure: + // a) [ expr ] and [ ] are not mixed + // b) [ expr ] and an init are not used together + + | newArrayDeclarator (arrayInitializer)? + ) + ; + +argList + : ( expressionList + | /*nothing*/ + {#argList = #[ELIST,"ELIST"];} + ) + ; + +newArrayDeclarator + : ( + // CONFLICT: + // newExpression is a primaryExpression which can be + // followed by an array index reference. This is ok, + // as the generated code will stay in this loop as + // long as it sees an LBRACK (proper behavior) + options { + warnWhenFollowAmbig = false; + } + : + lb:LBRACK^ {#lb.setType(ARRAY_DECLARATOR);} + (expression)? + RBRACK! + )+ + ; + +constant + : NUM_INT + | CHAR_LITERAL + | STRING_LITERAL + | NUM_FLOAT + | NUM_LONG + | NUM_DOUBLE + ; + + +//---------------------------------------------------------------------------- +// The Java scanner +//---------------------------------------------------------------------------- +class JavaLexer extends Lexer; + +options { + exportVocab=Java; // call the vocabulary "Java" + testLiterals=false; // don't automatically test for literals + k=4; // four characters of lookahead + charVocabulary='\u0003'..'\uFFFF'; + // without inlining some bitset tests, couldn't do unicode; + // I need to make ANTLR generate smaller bitsets; see + // bottom of JavaLexer.java + codeGenBitsetTestThreshold=20; +} + + + +// OPERATORS +QUESTION : '?' ; +LPAREN : '(' ; +RPAREN : ')' ; +LBRACK : '[' ; +RBRACK : ']' ; +LCURLY : '{' ; +RCURLY : '}' ; +COLON : ':' ; +COMMA : ',' ; +//DOT : '.' ; +ASSIGN : '=' ; +EQUAL : "==" ; +LNOT : '!' ; +BNOT : '~' ; +NOT_EQUAL : "!=" ; +DIV : '/' ; +DIV_ASSIGN : "/=" ; +PLUS : '+' ; +PLUS_ASSIGN : "+=" ; +INC : "++" ; +MINUS : '-' ; +MINUS_ASSIGN : "-=" ; +DEC : "--" ; +STAR : '*' ; +STAR_ASSIGN : "*=" ; +MOD : '%' ; +MOD_ASSIGN : "%=" ; +SR : ">>" ; +SR_ASSIGN : ">>=" ; +BSR : ">>>" ; +BSR_ASSIGN : ">>>=" ; +GE : ">=" ; +GT : ">" ; +SL : "<<" ; +SL_ASSIGN : "<<=" ; +LE : "<=" ; +LT : '<' ; +BXOR : '^' ; +BXOR_ASSIGN : "^=" ; +BOR : '|' ; +BOR_ASSIGN : "|=" ; +LOR : "||" ; +BAND : '&' ; +BAND_ASSIGN : "&=" ; +LAND : "&&" ; +SEMI : ';' ; + + +// Whitespace -- ignored +WS : ( ' ' + | '\t' + | '\f' + // handle newlines + | ( options {generateAmbigWarnings=false;} + : "\r\n" // Evil DOS + | '\r' // Macintosh + | '\n' // Unix (the right way) + ) + { newline(); } + )+ + { _ttype = Token.SKIP; } + ; + +// Single-line comments +SL_COMMENT + : "//" + (~('\n'|'\r'))* ('\n'|'\r'('\n')?) + {$setType(Token.SKIP); newline();} + ; + +// multiple-line comments +ML_COMMENT + : "/*" + ( /* '\r' '\n' can be matched in one alternative or by matching + '\r' in one iteration and '\n' in another. I am trying to + handle any flavor of newline that comes in, but the language + that allows both "\r\n" and "\r" and "\n" to all be valid + newline is ambiguous. Consequently, the resulting grammar + must be ambiguous. I'm shutting this warning off. + */ + options { + generateAmbigWarnings=false; + } + : + { LA(2)!='/' }? '*' + | '\r' '\n' {newline();} + | '\r' {newline();} + | '\n' {newline();} + | ~('*'|'\n'|'\r') + )* + "*/" + {$setType(Token.SKIP);} + ; + + +// character literals +CHAR_LITERAL + : '\'' ( ESC | ~'\'' ) '\'' + ; + +// string literals +STRING_LITERAL + : '"' (ESC|~('"'|'\\'))* '"' + ; + + +// escape sequence -- note that this is protected; it can only be called +// from another lexer rule -- it will not ever directly return a token to +// the parser +// There are various ambiguities hushed in this rule. The optional +// '0'...'9' digit matches should be matched here rather than letting +// them go back to STRING_LITERAL to be matched. ANTLR does the +// right thing by matching immediately; hence, it's ok to shut off +// the FOLLOW ambig warnings. +protected +ESC + : '\\' + ( 'n' + | 'r' + | 't' + | 'b' + | 'f' + | '"' + | '\'' + | '\\' + | ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT + | '0'..'3' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + )? + )? + | '4'..'7' + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + )? + ) + ; + + +// hexadecimal digit (again, note it's protected!) +protected +HEX_DIGIT + : ('0'..'9'|'A'..'F'|'a'..'f') + ; + + +// a dummy rule to force vocabulary to be all characters (except special +// ones that ANTLR uses internally (0 to 2) +protected +VOCAB + : '\3'..'\377' + ; + + +// an identifier. Note that testLiterals is set to true! This means +// that after we match the rule, we look in the literals table to see +// if it's a literal or really an identifer +IDENT + options {testLiterals=true;} + : ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')* + ; + + +// a numeric literal +NUM_INT + {boolean isDecimal=false; Token t=null;} + : '.' {_ttype = DOT;} + ( ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX {t=f1;})? + { + if (t != null && t.getText().toUpperCase().indexOf('F')>=0) { + _ttype = NUM_FLOAT; + } + else { + _ttype = NUM_DOUBLE; // assume double + } + } + )? + + | ( '0' {isDecimal = true;} // special case for just '0' + ( ('x'|'X') + ( // hex + // the 'e'|'E' and float suffix stuff look + // like hex digits, hence the (...)+ doesn't + // know when to stop: ambig. ANTLR resolves + // it correctly by matching immediately. It + // is therefor ok to hush warning. + options { + warnWhenFollowAmbig=false; + } + : HEX_DIGIT + )+ + | ('0'..'7')+ // octal + )? + | ('1'..'9') ('0'..'9')* {isDecimal=true;} // non-zero decimal + ) + ( ('l'|'L') { _ttype = NUM_LONG; } + + // only check to see if it's a float if looks like decimal so far + | {isDecimal}? + ( '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX {t=f2;})? + | EXPONENT (f3:FLOAT_SUFFIX {t=f3;})? + | f4:FLOAT_SUFFIX {t=f4;} + ) + { + if (t != null && t.getText().toUpperCase() .indexOf('F') >= 0) { + _ttype = NUM_FLOAT; + } + else { + _ttype = NUM_DOUBLE; // assume double + } + } + )? + ; + + +// a couple protected methods to assist in matching floating point numbers +protected +EXPONENT + : ('e'|'E') ('+'|'-')? ('0'..'9')+ + ; + + +protected +FLOAT_SUFFIX + : 'f'|'F'|'d'|'D' + ; + + diff --git a/topics/grammars/java/antlr-java-6/Java.g b/topics/grammars/java/antlr-java-6/Java.g new file mode 100644 index 00000000..b15b9433 --- /dev/null +++ b/topics/grammars/java/antlr-java-6/Java.g @@ -0,0 +1,2507 @@ +/* + [The "BSD licence"] + Copyright (c) 2007-2008 Terence Parr + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * This file is modified by Yang Jiang (yang.jiang.z@gmail.com), taken from the original + * java grammar in www.antlr.org, with the goal to provide a standard ANTLR grammar + * for java, as well as an implementation to construct the same AST trees as javac does. + * + * The major changes of this version as compared to the original version include: + * 1) Top level rules are changed to include all of their sub-components. + * For example, the rule + * + * classOrInterfaceDeclaration + * : classOrInterfaceModifiers (classDeclaration | interfaceDeclaration) + * ; + * + * is changed to + * + * classOrInterfaceDeclaration + * : classDeclaration | interfaceDeclaration + * ; + * + * with classOrInterfaceModifiers been moved inside classDeclaration and + * interfaceDeclaration. + * + * 2) The original version is not quite clear on certain rules like memberDecl, + * where it mixed the styles of listing of top level rules and listing of sub rules. + * + * memberDecl + * : genericMethodOrConstructorDecl + * | memberDeclaration + * | 'void' Identifier voidMethodDeclaratorRest + * | Identifier constructorDeclaratorRest + * | interfaceDeclaration + * | classDeclaration + * ; + * + * This is changed to a + * + * memberDecl + * : fieldDeclaration + * | methodDeclaration + * | classDeclaration + * | interfaceDeclaration + * ; + * by folding similar rules into single rule. + * + * 3) Some syntactical predicates are added for efficiency, although this is not necessary + * for correctness. + * + * 4) Lexer part is rewritten completely to construct tokens needed for the parser. + * + * 5) This grammar adds more source level support + * + * + * This grammar also adds bug fixes. + * + * 1) Adding typeArguments to superSuffix to alHexSignificandlow input like + * super.method() + * + * 2) Adding typeArguments to innerCreator to allow input like + * new Type1().new Type2() + * + * 3) conditionalExpression is changed to + * conditionalExpression + * : conditionalOrExpression ( '?' expression ':' conditionalExpression )? + * ; + * to accept input like + * true?1:2=3 + * + * Note: note this is by no means a valid input, by the grammar should be able to parse + * this as + * (true?1:2)=3 + * rather than + * true?1:(2=3) + * + * + * Know problems: + * Won't pass input containing unicode sequence like this + * char c = '\uffff' + * String s = "\uffff"; + * Because Antlr does not treat '\uffff' as an valid char. This will be fixed in the next Antlr + * release. [Fixed in Antlr-3.1.1] + * + * Things to do: + * More effort to make this grammar faster. + * Error reporting/recovering. + * + * + * NOTE: If you try to compile this file from command line and Antlr gives an exception + * like error message while compiling, add option + * -Xconversiontimeout 100000 + * to the command line. + * If it still doesn't work or the compilation process + * takes too long, try to comment out the following two lines: + * | {isValidSurrogateIdentifierStart((char)input.LT(1), (char)input.LT(2))}?=>('\ud800'..'\udbff') ('\udc00'..'\udfff') + * | {isValidSurrogateIdentifierPart((char)input.LT(1), (char)input.LT(2))}?=>('\ud800'..'\udbff') ('\udc00'..'\udfff') + * + * + * Below are comments found in the original version. + */ + + +/** A Java 1.5 grammar for ANTLR v3 derived from the spec + * + * This is a very close representation of the spec; the changes + * are comestic (remove left recursion) and also fixes (the spec + * isn't exactly perfect). I have run this on the 1.4.2 source + * and some nasty looking enums from 1.5, but have not really + * tested for 1.5 compatibility. + * + * I built this with: java -Xmx100M org.antlr.Tool java.g + * and got two errors that are ok (for now): + * java.g:691:9: Decision can match input such as + * "'0'..'9'{'E', 'e'}{'+', '-'}'0'..'9'{'D', 'F', 'd', 'f'}" + * using multiple alternatives: 3, 4 + * As a result, alternative(s) 4 were disabled for that input + * java.g:734:35: Decision can match input such as "{'$', 'A'..'Z', + * '_', 'a'..'z', '\u00C0'..'\u00D6', '\u00D8'..'\u00F6', + * '\u00F8'..'\u1FFF', '\u3040'..'\u318F', '\u3300'..'\u337F', + * '\u3400'..'\u3D2D', '\u4E00'..'\u9FFF', '\uF900'..'\uFAFF'}" + * using multiple alternatives: 1, 2 + * As a result, alternative(s) 2 were disabled for that input + * + * You can turn enum on/off as a keyword :) + * + * Version 1.0 -- initial release July 5, 2006 (requires 3.0b2 or higher) + * + * Primary author: Terence Parr, July 2006 + * + * Version 1.0.1 -- corrections by Koen Vanderkimpen & Marko van Dooren, + * October 25, 2006; + * fixed normalInterfaceDeclaration: now uses typeParameters instead + * of typeParameter (according to JLS, 3rd edition) + * fixed castExpression: no longer allows expression next to type + * (according to semantics in JLS, in contrast with syntax in JLS) + * + * Version 1.0.2 -- Terence Parr, Nov 27, 2006 + * java spec I built this from had some bizarre for-loop control. + * Looked weird and so I looked elsewhere...Yep, it's messed up. + * simplified. + * + * Version 1.0.3 -- Chris Hogue, Feb 26, 2007 + * Factored out an annotationName rule and used it in the annotation rule. + * Not sure why, but typeName wasn't recognizing references to inner + * annotations (e.g. @InterfaceName.InnerAnnotation()) + * Factored out the elementValue section of an annotation reference. Created + * elementValuePair and elementValuePairs rules, then used them in the + * annotation rule. Allows it to recognize annotation references with + * multiple, comma separated attributes. + * Updated elementValueArrayInitializer so that it allows multiple elements. + * (It was only allowing 0 or 1 element). + * Updated localVariableDeclaration to allow annotations. Interestingly the JLS + * doesn't appear to indicate this is legal, but it does work as of at least + * JDK 1.5.0_06. + * Moved the Identifier portion of annotationTypeElementRest to annotationMethodRest. + * Because annotationConstantRest already references variableDeclarator which + * has the Identifier portion in it, the parser would fail on constants in + * annotation definitions because it expected two identifiers. + * Added optional trailing ';' to the alternatives in annotationTypeElementRest. + * Wouldn't handle an inner interface that has a trailing ';'. + * Swapped the expression and type rule reference order in castExpression to + * make it check for genericized casts first. It was failing to recognize a + * statement like "Class TYPE = (Class)...;" because it was seeing + * 'Class'. + * Changed createdName to use typeArguments instead of nonWildcardTypeArguments. + * + * Changed the 'this' alternative in primary to allow 'identifierSuffix' rather than + * just 'arguments'. The case it couldn't handle was a call to an explicit + * generic method invocation (e.g. this.doSomething()). Using identifierSuffix + * may be overly aggressive--perhaps should create a more constrained thisSuffix rule? + * + * Version 1.0.4 -- Hiroaki Nakamura, May 3, 2007 + * + * Fixed formalParameterDecls, localVariableDeclaration, forInit, + * and forVarControl to use variableModifier* not 'final'? (annotation)? + * + * Version 1.0.5 -- Terence, June 21, 2007 + * --a[i].foo didn't work. Fixed unaryExpression + * + * Version 1.0.6 -- John Ridgway, March 17, 2008 + * Made "assert" a switchable keyword like "enum". + * Fixed compilationUnit to disallow "annotation importDeclaration ...". + * Changed "Identifier ('.' Identifier)*" to "qualifiedName" in more + * places. + * Changed modifier* and/or variableModifier* to classOrInterfaceModifiers, + * modifiers or variableModifiers, as appropriate. + * Renamed "bound" to "typeBound" to better match language in the JLS. + * Added "memberDeclaration" which rewrites to methodDeclaration or + * fieldDeclaration and pulled type into memberDeclaration. So we parse + * type and then move on to decide whether we're dealing with a field + * or a method. + * Modified "constructorDeclaration" to use "constructorBody" instead of + * "methodBody". constructorBody starts with explicitConstructorInvocation, + * then goes on to blockStatement*. Pulling explicitConstructorInvocation + * out of expressions allowed me to simplify "primary". + * Changed variableDeclarator to simplify it. + * Changed type to use classOrInterfaceType, thus simplifying it; of course + * I then had to add classOrInterfaceType, but it is used in several + * places. + * Fixed annotations, old version allowed "@X(y,z)", which is illegal. + * Added optional comma to end of "elementValueArrayInitializer"; as per JLS. + * Changed annotationTypeElementRest to use normalClassDeclaration and + * normalInterfaceDeclaration rather than classDeclaration and + * interfaceDeclaration, thus getting rid of a couple of grammar ambiguities. + * Split localVariableDeclaration into localVariableDeclarationStatement + * (includes the terminating semi-colon) and localVariableDeclaration. + * This allowed me to use localVariableDeclaration in "forInit" clauses, + * simplifying them. + * Changed switchBlockStatementGroup to use multiple labels. This adds an + * ambiguity, but if one uses appropriately greedy parsing it yields the + * parse that is closest to the meaning of the switch statement. + * Renamed "forVarControl" to "enhancedForControl" -- JLS language. + * Added semantic predicates to test for shift operations rather than other + * things. Thus, for instance, the string "< <" will never be treated + * as a left-shift operator. + * In "creator" we rule out "nonWildcardTypeArguments" on arrayCreation, + * which are illegal. + * Moved "nonWildcardTypeArguments into innerCreator. + * Removed 'super' superSuffix from explicitGenericInvocation, since that + * is only used in explicitConstructorInvocation at the beginning of a + * constructorBody. (This is part of the simplification of expressions + * mentioned earlier.) + * Simplified primary (got rid of those things that are only used in + * explicitConstructorInvocation). + * Lexer -- removed "Exponent?" from FloatingPointLiteral choice 4, since it + * led to an ambiguity. + * + * This grammar successfully parses every .java file in the JDK 1.5 source + * tree (excluding those whose file names include '-', which are not + * valid Java compilation units). + * + * Known remaining problems: + * "Letter" and "JavaIDDigit" are wrong. The actual specification of + * "Letter" should be "a character for which the method + * Character.isJavaIdentifierStart(int) returns true." A "Java + * letter-or-digit is a character for which the method + * Character.isJavaIdentifierPart(int) returns true." + */ + + + /* + This is a merged file, containing two versions of the Java.g grammar. + To extract a version from the file, run the ver.jar with the command provided below. + + Version 1 - tree building version, with all source level support, error recovery etc. + This is the version for compiler grammar workspace. + This version can be extracted by invoking: + java -cp ver.jar Main 1 true true true true true Java.g + + Version 2 - clean version, with no source leve support, no error recovery, no predicts, + assumes 1.6 level, works in Antlrworks. + This is the version for Alex. + This version can be extracted by invoking: + java -cp ver.jar Main 2 false false false false false Java.g +*/ + +grammar Java; + + +options { + backtrack=true; + memoize=true; +} + +/******************************************************************************************** + Parser section +*********************************************************************************************/ + +compilationUnit + : ( (annotations + )? + packageDeclaration + )? + (importDeclaration + )* + (typeDeclaration + )* + ; + +packageDeclaration + : 'package' qualifiedName + ';' + ; + +importDeclaration + : 'import' + ('static' + )? + IDENTIFIER '.' '*' + ';' + | 'import' + ('static' + )? + IDENTIFIER + ('.' IDENTIFIER + )+ + ('.' '*' + )? + ';' + ; + +qualifiedImportName + : IDENTIFIER + ('.' IDENTIFIER + )* + ; + +typeDeclaration + : classOrInterfaceDeclaration + | ';' + ; + +classOrInterfaceDeclaration + : classDeclaration + | interfaceDeclaration + ; + + +modifiers + : + ( annotation + | 'public' + | 'protected' + | 'private' + | 'static' + | 'abstract' + | 'final' + | 'native' + | 'synchronized' + | 'transient' + | 'volatile' + | 'strictfp' + )* + ; + + +variableModifiers + : ( 'final' + | annotation + )* + ; + + +classDeclaration + : normalClassDeclaration + | enumDeclaration + ; + +normalClassDeclaration + : modifiers 'class' IDENTIFIER + (typeParameters + )? + ('extends' type + )? + ('implements' typeList + )? + classBody + ; + + +typeParameters + : '<' + typeParameter + (',' typeParameter + )* + '>' + ; + +typeParameter + : IDENTIFIER + ('extends' typeBound + )? + ; + + +typeBound + : type + ('&' type + )* + ; + + +enumDeclaration + : modifiers + ('enum' + ) + IDENTIFIER + ('implements' typeList + )? + enumBody + ; + + +enumBody + : '{' + (enumConstants + )? + ','? + (enumBodyDeclarations + )? + '}' + ; + +enumConstants + : enumConstant + (',' enumConstant + )* + ; + +/** + * NOTE: here differs from the javac grammar, missing TypeArguments. + * EnumeratorDeclaration = AnnotationsOpt [TypeArguments] IDENTIFIER [ Arguments ] [ "{" ClassBody "}" ] + */ +enumConstant + : (annotations + )? + IDENTIFIER + (arguments + )? + (classBody + )? + /* TODO: $GScope::name = names.empty. enum constant body is actually + an anonymous class, where constructor isn't allowed, have to add this check*/ + ; + +enumBodyDeclarations + : ';' + (classBodyDeclaration + )* + ; + +interfaceDeclaration + : normalInterfaceDeclaration + | annotationTypeDeclaration + ; + +normalInterfaceDeclaration + : modifiers 'interface' IDENTIFIER + (typeParameters + )? + ('extends' typeList + )? + interfaceBody + ; + +typeList + : type + (',' type + )* + ; + +classBody + : '{' + (classBodyDeclaration + )* + '}' + ; + +interfaceBody + : '{' + (interfaceBodyDeclaration + )* + '}' + ; + +classBodyDeclaration + : ';' + | ('static' + )? + block + | memberDecl + ; + +memberDecl + : fieldDeclaration + | methodDeclaration + | classDeclaration + | interfaceDeclaration + ; + + +methodDeclaration + : + /* For constructor, return type is null, name is 'init' */ + modifiers + (typeParameters + )? + IDENTIFIER + formalParameters + ('throws' qualifiedNameList + )? + '{' + (explicitConstructorInvocation + )? + (blockStatement + )* + '}' + | modifiers + (typeParameters + )? + (type + | 'void' + ) + IDENTIFIER + formalParameters + ('[' ']' + )* + ('throws' qualifiedNameList + )? + ( + block + | ';' + ) + ; + + +fieldDeclaration + : modifiers + type + variableDeclarator + (',' variableDeclarator + )* + ';' + ; + +variableDeclarator + : IDENTIFIER + ('[' ']' + )* + ('=' variableInitializer + )? + ; + +/** + *TODO: add predicates + */ +interfaceBodyDeclaration + : + interfaceFieldDeclaration + | interfaceMethodDeclaration + | interfaceDeclaration + | classDeclaration + | ';' + ; + +interfaceMethodDeclaration + : modifiers + (typeParameters + )? + (type + |'void' + ) + IDENTIFIER + formalParameters + ('[' ']' + )* + ('throws' qualifiedNameList + )? ';' + ; + +/** + * NOTE, should not use variableDeclarator here, as it doesn't necessary require + * an initializer, while an interface field does, or judge by the returned value. + * But this gives better diagnostic message, or antlr won't predict this rule. + */ +interfaceFieldDeclaration + : modifiers type variableDeclarator + (',' variableDeclarator + )* + ';' + ; + + +type + : classOrInterfaceType + ('[' ']' + )* + | primitiveType + ('[' ']' + )* + ; + + +classOrInterfaceType + : IDENTIFIER + (typeArguments + )? + ('.' IDENTIFIER + (typeArguments + )? + )* + ; + +primitiveType + : 'boolean' + | 'char' + | 'byte' + | 'short' + | 'int' + | 'long' + | 'float' + | 'double' + ; + +typeArguments + : '<' typeArgument + (',' typeArgument + )* + '>' + ; + +typeArgument + : type + | '?' + ( + ('extends' + |'super' + ) + type + )? + ; + +qualifiedNameList + : qualifiedName + (',' qualifiedName + )* + ; + +formalParameters + : '(' + (formalParameterDecls + )? + ')' + ; + +formalParameterDecls + : ellipsisParameterDecl + | normalParameterDecl + (',' normalParameterDecl + )* + | (normalParameterDecl + ',' + )+ + ellipsisParameterDecl + ; + +normalParameterDecl + : variableModifiers type IDENTIFIER + ('[' ']' + )* + ; + +ellipsisParameterDecl + : variableModifiers + type '...' + IDENTIFIER + ; + + +explicitConstructorInvocation + : (nonWildcardTypeArguments + )? //NOTE: the position of Identifier 'super' is set to the type args position here + ('this' + |'super' + ) + arguments ';' + + | primary + '.' + (nonWildcardTypeArguments + )? + 'super' + arguments ';' + ; + +qualifiedName + : IDENTIFIER + ('.' IDENTIFIER + )* + ; + +annotations + : (annotation + )+ + ; + +/** + * Using an annotation. + * '@' is flaged in modifier + */ +annotation + : '@' qualifiedName + ( '(' + ( elementValuePairs + | elementValue + )? + ')' + )? + ; + +elementValuePairs + : elementValuePair + (',' elementValuePair + )* + ; + +elementValuePair + : IDENTIFIER '=' elementValue + ; + +elementValue + : conditionalExpression + | annotation + | elementValueArrayInitializer + ; + +elementValueArrayInitializer + : '{' + (elementValue + (',' elementValue + )* + )? (',')? '}' + ; + + +/** + * Annotation declaration. + */ +annotationTypeDeclaration + : modifiers '@' + 'interface' + IDENTIFIER + annotationTypeBody + ; + + +annotationTypeBody + : '{' + (annotationTypeElementDeclaration + )* + '}' + ; + +/** + * NOTE: here use interfaceFieldDeclaration for field declared inside annotation. they are sytactically the same. + */ +annotationTypeElementDeclaration + : annotationMethodDeclaration + | interfaceFieldDeclaration + | normalClassDeclaration + | normalInterfaceDeclaration + | enumDeclaration + | annotationTypeDeclaration + | ';' + ; + +annotationMethodDeclaration + : modifiers type IDENTIFIER + '(' ')' ('default' elementValue + )? + ';' + ; + +block + : '{' + (blockStatement + )* + '}' + ; + +/* +staticBlock returns [JCBlock tree] + @init { + ListBuffer stats = new ListBuffer(); + int pos = ((AntlrJavacToken) $start).getStartIndex(); + } + @after { + $tree = T.at(pos).Block(Flags.STATIC, stats.toList()); + pu.storeEnd($tree, $stop); + // construct a dummy static modifiers for end position + pu.storeEnd(T.at(pos).Modifiers(Flags.STATIC, com.sun.tools.javac.util.List.nil()),$st); + } + : st_1='static' '{' + (blockStatement + { + if ($blockStatement.tree == null) { + stats.appendList($blockStatement.list); + } else { + stats.append($blockStatement.tree); + } + } + )* '}' + ; +*/ +blockStatement + : localVariableDeclarationStatement + | classOrInterfaceDeclaration + | statement + ; + + +localVariableDeclarationStatement + : localVariableDeclaration + ';' + ; + +localVariableDeclaration + : variableModifiers type + variableDeclarator + (',' variableDeclarator + )* + ; + +statement + : block + + | ('assert' + ) + expression (':' expression)? ';' + | 'assert' expression (':' expression)? ';' + | 'if' parExpression statement ('else' statement)? + | forstatement + | 'while' parExpression statement + | 'do' statement 'while' parExpression ';' + | trystatement + | 'switch' parExpression '{' switchBlockStatementGroups '}' + | 'synchronized' parExpression block + | 'return' (expression )? ';' + | 'throw' expression ';' + | 'break' + (IDENTIFIER + )? ';' + | 'continue' + (IDENTIFIER + )? ';' + | expression ';' + | IDENTIFIER ':' statement + | ';' + + ; + +switchBlockStatementGroups + : (switchBlockStatementGroup )* + ; + +switchBlockStatementGroup + : + switchLabel + (blockStatement + )* + ; + +switchLabel + : 'case' expression ':' + | 'default' ':' + ; + + +trystatement + : 'try' block + ( catches 'finally' block + | catches + | 'finally' block + ) + ; + +catches + : catchClause + (catchClause + )* + ; + +catchClause + : 'catch' '(' formalParameter + ')' block + ; + +formalParameter + : variableModifiers type IDENTIFIER + ('[' ']' + )* + ; + +forstatement + : + // enhanced for loop + 'for' '(' variableModifiers type IDENTIFIER ':' + expression ')' statement + + // normal for loop + | 'for' '(' + (forInit + )? ';' + (expression + )? ';' + (expressionList + )? ')' statement + ; + +forInit + : localVariableDeclaration + | expressionList + ; + +parExpression + : '(' expression ')' + ; + +expressionList + : expression + (',' expression + )* + ; + + +expression + : conditionalExpression + (assignmentOperator expression + )? + ; + + +assignmentOperator + : '=' + | '+=' + | '-=' + | '*=' + | '/=' + | '&=' + | '|=' + | '^=' + | '%=' + | '<' '<' '=' + | '>' '>' '>' '=' + | '>' '>' '=' + ; + + +conditionalExpression + : conditionalOrExpression + ('?' expression ':' conditionalExpression + )? + ; + +conditionalOrExpression + : conditionalAndExpression + ('||' conditionalAndExpression + )* + ; + +conditionalAndExpression + : inclusiveOrExpression + ('&&' inclusiveOrExpression + )* + ; + +inclusiveOrExpression + : exclusiveOrExpression + ('|' exclusiveOrExpression + )* + ; + +exclusiveOrExpression + : andExpression + ('^' andExpression + )* + ; + +andExpression + : equalityExpression + ('&' equalityExpression + )* + ; + +equalityExpression + : instanceOfExpression + ( + ( '==' + | '!=' + ) + instanceOfExpression + )* + ; + +instanceOfExpression + : relationalExpression + ('instanceof' type + )? + ; + +relationalExpression + : shiftExpression + (relationalOp shiftExpression + )* + ; + +relationalOp + : '<' '=' + | '>' '=' + | '<' + | '>' + ; + +shiftExpression + : additiveExpression + (shiftOp additiveExpression + )* + ; + + +shiftOp + : '<' '<' + | '>' '>' '>' + | '>' '>' + ; + + +additiveExpression + : multiplicativeExpression + ( + ( '+' + | '-' + ) + multiplicativeExpression + )* + ; + +multiplicativeExpression + : + unaryExpression + ( + ( '*' + | '/' + | '%' + ) + unaryExpression + )* + ; + +/** + * NOTE: for '+' and '-', if the next token is int or long interal, then it's not a unary expression. + * it's a literal with signed value. INTLTERAL AND LONG LITERAL are added here for this. + */ +unaryExpression + : '+' unaryExpression + | '-' unaryExpression + | '++' unaryExpression + | '--' unaryExpression + | unaryExpressionNotPlusMinus + ; + +unaryExpressionNotPlusMinus + : '~' unaryExpression + | '!' unaryExpression + | castExpression + | primary + (selector + )* + ( '++' + | '--' + )? + ; + +castExpression + : '(' primitiveType ')' unaryExpression + | '(' type ')' unaryExpressionNotPlusMinus + ; + +/** + * have to use scope here, parameter passing isn't well supported in antlr. + */ +primary + : parExpression + | 'this' + ('.' IDENTIFIER + )* + (identifierSuffix + )? + | IDENTIFIER + ('.' IDENTIFIER + )* + (identifierSuffix + )? + | 'super' + superSuffix + | literal + | creator + | primitiveType + ('[' ']' + )* + '.' 'class' + | 'void' '.' 'class' + ; + + +superSuffix + : arguments + | '.' (typeArguments + )? + IDENTIFIER + (arguments + )? + ; + + +identifierSuffix + : ('[' ']' + )+ + '.' 'class' + | ('[' expression ']' + )+ + | arguments + | '.' 'class' + | '.' nonWildcardTypeArguments IDENTIFIER arguments + | '.' 'this' + | '.' 'super' arguments + | innerCreator + ; + + +selector + : '.' IDENTIFIER + (arguments + )? + | '.' 'this' + | '.' 'super' + superSuffix + | innerCreator + | '[' expression ']' + ; + +creator + : 'new' nonWildcardTypeArguments classOrInterfaceType classCreatorRest + | 'new' classOrInterfaceType classCreatorRest + | arrayCreator + ; + +arrayCreator + : 'new' createdName + '[' ']' + ('[' ']' + )* + arrayInitializer + + | 'new' createdName + '[' expression + ']' + ( '[' expression + ']' + )* + ('[' ']' + )* + ; + +variableInitializer + : arrayInitializer + | expression + ; + +arrayInitializer + : '{' + (variableInitializer + (',' variableInitializer + )* + )? + (',')? + '}' //Yang's fix, position change. + ; + + +createdName + : classOrInterfaceType + | primitiveType + ; + +innerCreator + : '.' 'new' + (nonWildcardTypeArguments + )? + IDENTIFIER + (typeArguments + )? + classCreatorRest + ; + + +classCreatorRest + : arguments + (classBody + )? + ; + + +nonWildcardTypeArguments + : '<' typeList + '>' + ; + +arguments + : '(' (expressionList + )? ')' + ; + +literal + : INTLITERAL + | LONGLITERAL + | FLOATLITERAL + | DOUBLELITERAL + | CHARLITERAL + | STRINGLITERAL + | TRUE + | FALSE + | NULL + ; + +/** + * These are headers help to make syntatical predicates, not necessary but helps to make grammar faster. + */ + +classHeader + : modifiers 'class' IDENTIFIER + ; + +enumHeader + : modifiers ('enum'|IDENTIFIER) IDENTIFIER + ; + +interfaceHeader + : modifiers 'interface' IDENTIFIER + ; + +annotationHeader + : modifiers '@' 'interface' IDENTIFIER + ; + +typeHeader + : modifiers ('class'|'enum'|('@' ? 'interface')) IDENTIFIER + ; + +methodHeader + : modifiers typeParameters? (type|'void')? IDENTIFIER '(' + ; + +fieldHeader + : modifiers type IDENTIFIER ('['']')* ('='|','|';') + ; + +localVariableHeader + : variableModifiers type IDENTIFIER ('['']')* ('='|','|';') + ; + + + + +/******************************************************************************************** + Lexer section +*********************************************************************************************/ + +LONGLITERAL + : IntegerNumber LongSuffix + ; + + +INTLITERAL + : IntegerNumber + ; + +fragment +IntegerNumber + : '0' + | '1'..'9' ('0'..'9')* + | '0' ('0'..'7')+ + | HexPrefix HexDigit+ + ; + +fragment +HexPrefix + : '0x' | '0X' + ; + +fragment +HexDigit + : ('0'..'9'|'a'..'f'|'A'..'F') + ; + +fragment +LongSuffix + : 'l' | 'L' + ; + + +fragment +NonIntegerNumber + : ('0' .. '9')+ '.' ('0' .. '9')* Exponent? + | '.' ( '0' .. '9' )+ Exponent? + | ('0' .. '9')+ Exponent + | ('0' .. '9')+ + | + HexPrefix (HexDigit )* + ( () + | ('.' (HexDigit )* ) + ) + ( 'p' | 'P' ) + ( '+' | '-' )? + ( '0' .. '9' )+ + ; + +fragment +Exponent + : ( 'e' | 'E' ) ( '+' | '-' )? ( '0' .. '9' )+ + ; + +fragment +FloatSuffix + : 'f' | 'F' + ; + +fragment +DoubleSuffix + : 'd' | 'D' + ; + +FLOATLITERAL + : NonIntegerNumber FloatSuffix + ; + +DOUBLELITERAL + : NonIntegerNumber DoubleSuffix? + ; + +CHARLITERAL + : '\'' + ( EscapeSequence + | ~( '\'' | '\\' | '\r' | '\n' ) + ) + '\'' + ; + +STRINGLITERAL + : '"' + ( EscapeSequence + | ~( '\\' | '"' | '\r' | '\n' ) + )* + '"' + ; + +fragment +EscapeSequence + : '\\' ( + 'b' + | 't' + | 'n' + | 'f' + | 'r' + | '\"' + | '\'' + | '\\' + | + ('0'..'3') ('0'..'7') ('0'..'7') + | + ('0'..'7') ('0'..'7') + | + ('0'..'7') + ) +; + +WS + : ( + ' ' + | '\r' + | '\t' + | '\u000C' + | '\n' + ) + { + skip(); + } + ; + +COMMENT + @init{ + boolean isJavaDoc = false; + } + : '/*' + { + if((char)input.LA(1) == '*'){ + isJavaDoc = true; + } + } + (options {greedy=false;} : . )* + '*/' + { + if(isJavaDoc==true){ + $channel=HIDDEN; + }else{ + skip(); + } + } + ; + +LINE_COMMENT + : '//' ~('\n'|'\r')* ('\r\n' | '\r' | '\n') + { + skip(); + } + | '//' ~('\n'|'\r')* // a line comment could appear at the end of the file without CR/LF + { + skip(); + } + ; + +ABSTRACT + : 'abstract' + ; + +ASSERT + : 'assert' + ; + +BOOLEAN + : 'boolean' + ; + +BREAK + : 'break' + ; + +BYTE + : 'byte' + ; + +CASE + : 'case' + ; + +CATCH + : 'catch' + ; + +CHAR + : 'char' + ; + +CLASS + : 'class' + ; + +CONST + : 'const' + ; + +CONTINUE + : 'continue' + ; + +DEFAULT + : 'default' + ; + +DO + : 'do' + ; + +DOUBLE + : 'double' + ; + +ELSE + : 'else' + ; + +ENUM + : 'enum' + ; + +EXTENDS + : 'extends' + ; + +FINAL + : 'final' + ; + +FINALLY + : 'finally' + ; + +FLOAT + : 'float' + ; + +FOR + : 'for' + ; + +GOTO + : 'goto' + ; + +IF + : 'if' + ; + +IMPLEMENTS + : 'implements' + ; + +IMPORT + : 'import' + ; + +INSTANCEOF + : 'instanceof' + ; + +INT + : 'int' + ; + +INTERFACE + : 'interface' + ; + +LONG + : 'long' + ; + +NATIVE + : 'native' + ; + +NEW + : 'new' + ; + +PACKAGE + : 'package' + ; + +PRIVATE + : 'private' + ; + +PROTECTED + : 'protected' + ; + +PUBLIC + : 'public' + ; + +RETURN + : 'return' + ; + +SHORT + : 'short' + ; + +STATIC + : 'static' + ; + +STRICTFP + : 'strictfp' + ; + +SUPER + : 'super' + ; + +SWITCH + : 'switch' + ; + +SYNCHRONIZED + : 'synchronized' + ; + +THIS + : 'this' + ; + +THROW + : 'throw' + ; + +THROWS + : 'throws' + ; + +TRANSIENT + : 'transient' + ; + +TRY + : 'try' + ; + +VOID + : 'void' + ; + +VOLATILE + : 'volatile' + ; + +WHILE + : 'while' + ; + +TRUE + : 'true' + ; + +FALSE + : 'false' + ; + +NULL + : 'null' + ; + +LPAREN + : '(' + ; + +RPAREN + : ')' + ; + +LBRACE + : '{' + ; + +RBRACE + : '}' + ; + +LBRACKET + : '[' + ; + +RBRACKET + : ']' + ; + +SEMI + : ';' + ; + +COMMA + : ',' + ; + +DOT + : '.' + ; + +ELLIPSIS + : '...' + ; + +EQ + : '=' + ; + +BANG + : '!' + ; + +TILDE + : '~' + ; + +QUES + : '?' + ; + +COLON + : ':' + ; + +EQEQ + : '==' + ; + +AMPAMP + : '&&' + ; + +BARBAR + : '||' + ; + +PLUSPLUS + : '++' + ; + +SUBSUB + : '--' + ; + +PLUS + : '+' + ; + +SUB + : '-' + ; + +STAR + : '*' + ; + +SLASH + : '/' + ; + +AMP + : '&' + ; + +BAR + : '|' + ; + +CARET + : '^' + ; + +PERCENT + : '%' + ; + +PLUSEQ + : '+=' + ; + +SUBEQ + : '-=' + ; + +STAREQ + : '*=' + ; + +SLASHEQ + : '/=' + ; + +AMPEQ + : '&=' + ; + +BAREQ + : '|=' + ; + +CARETEQ + : '^=' + ; + +PERCENTEQ + : '%=' + ; + +MONKEYS_AT + : '@' + ; + +BANGEQ + : '!=' + ; + +GT + : '>' + ; + +LT + : '<' + ; + +IDENTIFIER + : IdentifierStart IdentifierPart* + ; + +fragment +SurrogateIdentifer + : ('\ud800'..'\udbff') ('\udc00'..'\udfff') + ; + +fragment +IdentifierStart + : '\u0024' + | '\u0041'..'\u005a' + | '\u005f' + | '\u0061'..'\u007a' + | '\u00a2'..'\u00a5' + | '\u00aa' + | '\u00b5' + | '\u00ba' + | '\u00c0'..'\u00d6' + | '\u00d8'..'\u00f6' + | '\u00f8'..'\u0236' + | '\u0250'..'\u02c1' + | '\u02c6'..'\u02d1' + | '\u02e0'..'\u02e4' + | '\u02ee' + | '\u037a' + | '\u0386' + | '\u0388'..'\u038a' + | '\u038c' + | '\u038e'..'\u03a1' + | '\u03a3'..'\u03ce' + | '\u03d0'..'\u03f5' + | '\u03f7'..'\u03fb' + | '\u0400'..'\u0481' + | '\u048a'..'\u04ce' + | '\u04d0'..'\u04f5' + | '\u04f8'..'\u04f9' + | '\u0500'..'\u050f' + | '\u0531'..'\u0556' + | '\u0559' + | '\u0561'..'\u0587' + | '\u05d0'..'\u05ea' + | '\u05f0'..'\u05f2' + | '\u0621'..'\u063a' + | '\u0640'..'\u064a' + | '\u066e'..'\u066f' + | '\u0671'..'\u06d3' + | '\u06d5' + | '\u06e5'..'\u06e6' + | '\u06ee'..'\u06ef' + | '\u06fa'..'\u06fc' + | '\u06ff' + | '\u0710' + | '\u0712'..'\u072f' + | '\u074d'..'\u074f' + | '\u0780'..'\u07a5' + | '\u07b1' + | '\u0904'..'\u0939' + | '\u093d' + | '\u0950' + | '\u0958'..'\u0961' + | '\u0985'..'\u098c' + | '\u098f'..'\u0990' + | '\u0993'..'\u09a8' + | '\u09aa'..'\u09b0' + | '\u09b2' + | '\u09b6'..'\u09b9' + | '\u09bd' + | '\u09dc'..'\u09dd' + | '\u09df'..'\u09e1' + | '\u09f0'..'\u09f3' + | '\u0a05'..'\u0a0a' + | '\u0a0f'..'\u0a10' + | '\u0a13'..'\u0a28' + | '\u0a2a'..'\u0a30' + | '\u0a32'..'\u0a33' + | '\u0a35'..'\u0a36' + | '\u0a38'..'\u0a39' + | '\u0a59'..'\u0a5c' + | '\u0a5e' + | '\u0a72'..'\u0a74' + | '\u0a85'..'\u0a8d' + | '\u0a8f'..'\u0a91' + | '\u0a93'..'\u0aa8' + | '\u0aaa'..'\u0ab0' + | '\u0ab2'..'\u0ab3' + | '\u0ab5'..'\u0ab9' + | '\u0abd' + | '\u0ad0' + | '\u0ae0'..'\u0ae1' + | '\u0af1' + | '\u0b05'..'\u0b0c' + | '\u0b0f'..'\u0b10' + | '\u0b13'..'\u0b28' + | '\u0b2a'..'\u0b30' + | '\u0b32'..'\u0b33' + | '\u0b35'..'\u0b39' + | '\u0b3d' + | '\u0b5c'..'\u0b5d' + | '\u0b5f'..'\u0b61' + | '\u0b71' + | '\u0b83' + | '\u0b85'..'\u0b8a' + | '\u0b8e'..'\u0b90' + | '\u0b92'..'\u0b95' + | '\u0b99'..'\u0b9a' + | '\u0b9c' + | '\u0b9e'..'\u0b9f' + | '\u0ba3'..'\u0ba4' + | '\u0ba8'..'\u0baa' + | '\u0bae'..'\u0bb5' + | '\u0bb7'..'\u0bb9' + | '\u0bf9' + | '\u0c05'..'\u0c0c' + | '\u0c0e'..'\u0c10' + | '\u0c12'..'\u0c28' + | '\u0c2a'..'\u0c33' + | '\u0c35'..'\u0c39' + | '\u0c60'..'\u0c61' + | '\u0c85'..'\u0c8c' + | '\u0c8e'..'\u0c90' + | '\u0c92'..'\u0ca8' + | '\u0caa'..'\u0cb3' + | '\u0cb5'..'\u0cb9' + | '\u0cbd' + | '\u0cde' + | '\u0ce0'..'\u0ce1' + | '\u0d05'..'\u0d0c' + | '\u0d0e'..'\u0d10' + | '\u0d12'..'\u0d28' + | '\u0d2a'..'\u0d39' + | '\u0d60'..'\u0d61' + | '\u0d85'..'\u0d96' + | '\u0d9a'..'\u0db1' + | '\u0db3'..'\u0dbb' + | '\u0dbd' + | '\u0dc0'..'\u0dc6' + | '\u0e01'..'\u0e30' + | '\u0e32'..'\u0e33' + | '\u0e3f'..'\u0e46' + | '\u0e81'..'\u0e82' + | '\u0e84' + | '\u0e87'..'\u0e88' + | '\u0e8a' + | '\u0e8d' + | '\u0e94'..'\u0e97' + | '\u0e99'..'\u0e9f' + | '\u0ea1'..'\u0ea3' + | '\u0ea5' + | '\u0ea7' + | '\u0eaa'..'\u0eab' + | '\u0ead'..'\u0eb0' + | '\u0eb2'..'\u0eb3' + | '\u0ebd' + | '\u0ec0'..'\u0ec4' + | '\u0ec6' + | '\u0edc'..'\u0edd' + | '\u0f00' + | '\u0f40'..'\u0f47' + | '\u0f49'..'\u0f6a' + | '\u0f88'..'\u0f8b' + | '\u1000'..'\u1021' + | '\u1023'..'\u1027' + | '\u1029'..'\u102a' + | '\u1050'..'\u1055' + | '\u10a0'..'\u10c5' + | '\u10d0'..'\u10f8' + | '\u1100'..'\u1159' + | '\u115f'..'\u11a2' + | '\u11a8'..'\u11f9' + | '\u1200'..'\u1206' + | '\u1208'..'\u1246' + | '\u1248' + | '\u124a'..'\u124d' + | '\u1250'..'\u1256' + | '\u1258' + | '\u125a'..'\u125d' + | '\u1260'..'\u1286' + | '\u1288' + | '\u128a'..'\u128d' + | '\u1290'..'\u12ae' + | '\u12b0' + | '\u12b2'..'\u12b5' + | '\u12b8'..'\u12be' + | '\u12c0' + | '\u12c2'..'\u12c5' + | '\u12c8'..'\u12ce' + | '\u12d0'..'\u12d6' + | '\u12d8'..'\u12ee' + | '\u12f0'..'\u130e' + | '\u1310' + | '\u1312'..'\u1315' + | '\u1318'..'\u131e' + | '\u1320'..'\u1346' + | '\u1348'..'\u135a' + | '\u13a0'..'\u13f4' + | '\u1401'..'\u166c' + | '\u166f'..'\u1676' + | '\u1681'..'\u169a' + | '\u16a0'..'\u16ea' + | '\u16ee'..'\u16f0' + | '\u1700'..'\u170c' + | '\u170e'..'\u1711' + | '\u1720'..'\u1731' + | '\u1740'..'\u1751' + | '\u1760'..'\u176c' + | '\u176e'..'\u1770' + | '\u1780'..'\u17b3' + | '\u17d7' + | '\u17db'..'\u17dc' + | '\u1820'..'\u1877' + | '\u1880'..'\u18a8' + | '\u1900'..'\u191c' + | '\u1950'..'\u196d' + | '\u1970'..'\u1974' + | '\u1d00'..'\u1d6b' + | '\u1e00'..'\u1e9b' + | '\u1ea0'..'\u1ef9' + | '\u1f00'..'\u1f15' + | '\u1f18'..'\u1f1d' + | '\u1f20'..'\u1f45' + | '\u1f48'..'\u1f4d' + | '\u1f50'..'\u1f57' + | '\u1f59' + | '\u1f5b' + | '\u1f5d' + | '\u1f5f'..'\u1f7d' + | '\u1f80'..'\u1fb4' + | '\u1fb6'..'\u1fbc' + | '\u1fbe' + | '\u1fc2'..'\u1fc4' + | '\u1fc6'..'\u1fcc' + | '\u1fd0'..'\u1fd3' + | '\u1fd6'..'\u1fdb' + | '\u1fe0'..'\u1fec' + | '\u1ff2'..'\u1ff4' + | '\u1ff6'..'\u1ffc' + | '\u203f'..'\u2040' + | '\u2054' + | '\u2071' + | '\u207f' + | '\u20a0'..'\u20b1' + | '\u2102' + | '\u2107' + | '\u210a'..'\u2113' + | '\u2115' + | '\u2119'..'\u211d' + | '\u2124' + | '\u2126' + | '\u2128' + | '\u212a'..'\u212d' + | '\u212f'..'\u2131' + | '\u2133'..'\u2139' + | '\u213d'..'\u213f' + | '\u2145'..'\u2149' + | '\u2160'..'\u2183' + | '\u3005'..'\u3007' + | '\u3021'..'\u3029' + | '\u3031'..'\u3035' + | '\u3038'..'\u303c' + | '\u3041'..'\u3096' + | '\u309d'..'\u309f' + | '\u30a1'..'\u30ff' + | '\u3105'..'\u312c' + | '\u3131'..'\u318e' + | '\u31a0'..'\u31b7' + | '\u31f0'..'\u31ff' + | '\u3400'..'\u4db5' + | '\u4e00'..'\u9fa5' + | '\ua000'..'\ua48c' + | '\uac00'..'\ud7a3' + | '\uf900'..'\ufa2d' + | '\ufa30'..'\ufa6a' + | '\ufb00'..'\ufb06' + | '\ufb13'..'\ufb17' + | '\ufb1d' + | '\ufb1f'..'\ufb28' + | '\ufb2a'..'\ufb36' + | '\ufb38'..'\ufb3c' + | '\ufb3e' + | '\ufb40'..'\ufb41' + | '\ufb43'..'\ufb44' + | '\ufb46'..'\ufbb1' + | '\ufbd3'..'\ufd3d' + | '\ufd50'..'\ufd8f' + | '\ufd92'..'\ufdc7' + | '\ufdf0'..'\ufdfc' + | '\ufe33'..'\ufe34' + | '\ufe4d'..'\ufe4f' + | '\ufe69' + | '\ufe70'..'\ufe74' + | '\ufe76'..'\ufefc' + | '\uff04' + | '\uff21'..'\uff3a' + | '\uff3f' + | '\uff41'..'\uff5a' + | '\uff65'..'\uffbe' + | '\uffc2'..'\uffc7' + | '\uffca'..'\uffcf' + | '\uffd2'..'\uffd7' + | '\uffda'..'\uffdc' + | '\uffe0'..'\uffe1' + | '\uffe5'..'\uffe6' + | ('\ud800'..'\udbff') ('\udc00'..'\udfff') + ; + +fragment +IdentifierPart + : '\u0000'..'\u0008' + | '\u000e'..'\u001b' + | '\u0024' + | '\u0030'..'\u0039' + | '\u0041'..'\u005a' + | '\u005f' + | '\u0061'..'\u007a' + | '\u007f'..'\u009f' + | '\u00a2'..'\u00a5' + | '\u00aa' + | '\u00ad' + | '\u00b5' + | '\u00ba' + | '\u00c0'..'\u00d6' + | '\u00d8'..'\u00f6' + | '\u00f8'..'\u0236' + | '\u0250'..'\u02c1' + | '\u02c6'..'\u02d1' + | '\u02e0'..'\u02e4' + | '\u02ee' + | '\u0300'..'\u0357' + | '\u035d'..'\u036f' + | '\u037a' + | '\u0386' + | '\u0388'..'\u038a' + | '\u038c' + | '\u038e'..'\u03a1' + | '\u03a3'..'\u03ce' + | '\u03d0'..'\u03f5' + | '\u03f7'..'\u03fb' + | '\u0400'..'\u0481' + | '\u0483'..'\u0486' + | '\u048a'..'\u04ce' + | '\u04d0'..'\u04f5' + | '\u04f8'..'\u04f9' + | '\u0500'..'\u050f' + | '\u0531'..'\u0556' + | '\u0559' + | '\u0561'..'\u0587' + | '\u0591'..'\u05a1' + | '\u05a3'..'\u05b9' + | '\u05bb'..'\u05bd' + | '\u05bf' + | '\u05c1'..'\u05c2' + | '\u05c4' + | '\u05d0'..'\u05ea' + | '\u05f0'..'\u05f2' + | '\u0600'..'\u0603' + | '\u0610'..'\u0615' + | '\u0621'..'\u063a' + | '\u0640'..'\u0658' + | '\u0660'..'\u0669' + | '\u066e'..'\u06d3' + | '\u06d5'..'\u06dd' + | '\u06df'..'\u06e8' + | '\u06ea'..'\u06fc' + | '\u06ff' + | '\u070f'..'\u074a' + | '\u074d'..'\u074f' + | '\u0780'..'\u07b1' + | '\u0901'..'\u0939' + | '\u093c'..'\u094d' + | '\u0950'..'\u0954' + | '\u0958'..'\u0963' + | '\u0966'..'\u096f' + | '\u0981'..'\u0983' + | '\u0985'..'\u098c' + | '\u098f'..'\u0990' + | '\u0993'..'\u09a8' + | '\u09aa'..'\u09b0' + | '\u09b2' + | '\u09b6'..'\u09b9' + | '\u09bc'..'\u09c4' + | '\u09c7'..'\u09c8' + | '\u09cb'..'\u09cd' + | '\u09d7' + | '\u09dc'..'\u09dd' + | '\u09df'..'\u09e3' + | '\u09e6'..'\u09f3' + | '\u0a01'..'\u0a03' + | '\u0a05'..'\u0a0a' + | '\u0a0f'..'\u0a10' + | '\u0a13'..'\u0a28' + | '\u0a2a'..'\u0a30' + | '\u0a32'..'\u0a33' + | '\u0a35'..'\u0a36' + | '\u0a38'..'\u0a39' + | '\u0a3c' + | '\u0a3e'..'\u0a42' + | '\u0a47'..'\u0a48' + | '\u0a4b'..'\u0a4d' + | '\u0a59'..'\u0a5c' + | '\u0a5e' + | '\u0a66'..'\u0a74' + | '\u0a81'..'\u0a83' + | '\u0a85'..'\u0a8d' + | '\u0a8f'..'\u0a91' + | '\u0a93'..'\u0aa8' + | '\u0aaa'..'\u0ab0' + | '\u0ab2'..'\u0ab3' + | '\u0ab5'..'\u0ab9' + | '\u0abc'..'\u0ac5' + | '\u0ac7'..'\u0ac9' + | '\u0acb'..'\u0acd' + | '\u0ad0' + | '\u0ae0'..'\u0ae3' + | '\u0ae6'..'\u0aef' + | '\u0af1' + | '\u0b01'..'\u0b03' + | '\u0b05'..'\u0b0c' + | '\u0b0f'..'\u0b10' + | '\u0b13'..'\u0b28' + | '\u0b2a'..'\u0b30' + | '\u0b32'..'\u0b33' + | '\u0b35'..'\u0b39' + | '\u0b3c'..'\u0b43' + | '\u0b47'..'\u0b48' + | '\u0b4b'..'\u0b4d' + | '\u0b56'..'\u0b57' + | '\u0b5c'..'\u0b5d' + | '\u0b5f'..'\u0b61' + | '\u0b66'..'\u0b6f' + | '\u0b71' + | '\u0b82'..'\u0b83' + | '\u0b85'..'\u0b8a' + | '\u0b8e'..'\u0b90' + | '\u0b92'..'\u0b95' + | '\u0b99'..'\u0b9a' + | '\u0b9c' + | '\u0b9e'..'\u0b9f' + | '\u0ba3'..'\u0ba4' + | '\u0ba8'..'\u0baa' + | '\u0bae'..'\u0bb5' + | '\u0bb7'..'\u0bb9' + | '\u0bbe'..'\u0bc2' + | '\u0bc6'..'\u0bc8' + | '\u0bca'..'\u0bcd' + | '\u0bd7' + | '\u0be7'..'\u0bef' + | '\u0bf9' + | '\u0c01'..'\u0c03' + | '\u0c05'..'\u0c0c' + | '\u0c0e'..'\u0c10' + | '\u0c12'..'\u0c28' + | '\u0c2a'..'\u0c33' + | '\u0c35'..'\u0c39' + | '\u0c3e'..'\u0c44' + | '\u0c46'..'\u0c48' + | '\u0c4a'..'\u0c4d' + | '\u0c55'..'\u0c56' + | '\u0c60'..'\u0c61' + | '\u0c66'..'\u0c6f' + | '\u0c82'..'\u0c83' + | '\u0c85'..'\u0c8c' + | '\u0c8e'..'\u0c90' + | '\u0c92'..'\u0ca8' + | '\u0caa'..'\u0cb3' + | '\u0cb5'..'\u0cb9' + | '\u0cbc'..'\u0cc4' + | '\u0cc6'..'\u0cc8' + | '\u0cca'..'\u0ccd' + | '\u0cd5'..'\u0cd6' + | '\u0cde' + | '\u0ce0'..'\u0ce1' + | '\u0ce6'..'\u0cef' + | '\u0d02'..'\u0d03' + | '\u0d05'..'\u0d0c' + | '\u0d0e'..'\u0d10' + | '\u0d12'..'\u0d28' + | '\u0d2a'..'\u0d39' + | '\u0d3e'..'\u0d43' + | '\u0d46'..'\u0d48' + | '\u0d4a'..'\u0d4d' + | '\u0d57' + | '\u0d60'..'\u0d61' + | '\u0d66'..'\u0d6f' + | '\u0d82'..'\u0d83' + | '\u0d85'..'\u0d96' + | '\u0d9a'..'\u0db1' + | '\u0db3'..'\u0dbb' + | '\u0dbd' + | '\u0dc0'..'\u0dc6' + | '\u0dca' + | '\u0dcf'..'\u0dd4' + | '\u0dd6' + | '\u0dd8'..'\u0ddf' + | '\u0df2'..'\u0df3' + | '\u0e01'..'\u0e3a' + | '\u0e3f'..'\u0e4e' + | '\u0e50'..'\u0e59' + | '\u0e81'..'\u0e82' + | '\u0e84' + | '\u0e87'..'\u0e88' + | '\u0e8a' + | '\u0e8d' + | '\u0e94'..'\u0e97' + | '\u0e99'..'\u0e9f' + | '\u0ea1'..'\u0ea3' + | '\u0ea5' + | '\u0ea7' + | '\u0eaa'..'\u0eab' + | '\u0ead'..'\u0eb9' + | '\u0ebb'..'\u0ebd' + | '\u0ec0'..'\u0ec4' + | '\u0ec6' + | '\u0ec8'..'\u0ecd' + | '\u0ed0'..'\u0ed9' + | '\u0edc'..'\u0edd' + | '\u0f00' + | '\u0f18'..'\u0f19' + | '\u0f20'..'\u0f29' + | '\u0f35' + | '\u0f37' + | '\u0f39' + | '\u0f3e'..'\u0f47' + | '\u0f49'..'\u0f6a' + | '\u0f71'..'\u0f84' + | '\u0f86'..'\u0f8b' + | '\u0f90'..'\u0f97' + | '\u0f99'..'\u0fbc' + | '\u0fc6' + | '\u1000'..'\u1021' + | '\u1023'..'\u1027' + | '\u1029'..'\u102a' + | '\u102c'..'\u1032' + | '\u1036'..'\u1039' + | '\u1040'..'\u1049' + | '\u1050'..'\u1059' + | '\u10a0'..'\u10c5' + | '\u10d0'..'\u10f8' + | '\u1100'..'\u1159' + | '\u115f'..'\u11a2' + | '\u11a8'..'\u11f9' + | '\u1200'..'\u1206' + | '\u1208'..'\u1246' + | '\u1248' + | '\u124a'..'\u124d' + | '\u1250'..'\u1256' + | '\u1258' + | '\u125a'..'\u125d' + | '\u1260'..'\u1286' + | '\u1288' + | '\u128a'..'\u128d' + | '\u1290'..'\u12ae' + | '\u12b0' + | '\u12b2'..'\u12b5' + | '\u12b8'..'\u12be' + | '\u12c0' + | '\u12c2'..'\u12c5' + | '\u12c8'..'\u12ce' + | '\u12d0'..'\u12d6' + | '\u12d8'..'\u12ee' + | '\u12f0'..'\u130e' + | '\u1310' + | '\u1312'..'\u1315' + | '\u1318'..'\u131e' + | '\u1320'..'\u1346' + | '\u1348'..'\u135a' + | '\u1369'..'\u1371' + | '\u13a0'..'\u13f4' + | '\u1401'..'\u166c' + | '\u166f'..'\u1676' + | '\u1681'..'\u169a' + | '\u16a0'..'\u16ea' + | '\u16ee'..'\u16f0' + | '\u1700'..'\u170c' + | '\u170e'..'\u1714' + | '\u1720'..'\u1734' + | '\u1740'..'\u1753' + | '\u1760'..'\u176c' + | '\u176e'..'\u1770' + | '\u1772'..'\u1773' + | '\u1780'..'\u17d3' + | '\u17d7' + | '\u17db'..'\u17dd' + | '\u17e0'..'\u17e9' + | '\u180b'..'\u180d' + | '\u1810'..'\u1819' + | '\u1820'..'\u1877' + | '\u1880'..'\u18a9' + | '\u1900'..'\u191c' + | '\u1920'..'\u192b' + | '\u1930'..'\u193b' + | '\u1946'..'\u196d' + | '\u1970'..'\u1974' + | '\u1d00'..'\u1d6b' + | '\u1e00'..'\u1e9b' + | '\u1ea0'..'\u1ef9' + | '\u1f00'..'\u1f15' + | '\u1f18'..'\u1f1d' + | '\u1f20'..'\u1f45' + | '\u1f48'..'\u1f4d' + | '\u1f50'..'\u1f57' + | '\u1f59' + | '\u1f5b' + | '\u1f5d' + | '\u1f5f'..'\u1f7d' + | '\u1f80'..'\u1fb4' + | '\u1fb6'..'\u1fbc' + | '\u1fbe' + | '\u1fc2'..'\u1fc4' + | '\u1fc6'..'\u1fcc' + | '\u1fd0'..'\u1fd3' + | '\u1fd6'..'\u1fdb' + | '\u1fe0'..'\u1fec' + | '\u1ff2'..'\u1ff4' + | '\u1ff6'..'\u1ffc' + | '\u200c'..'\u200f' + | '\u202a'..'\u202e' + | '\u203f'..'\u2040' + | '\u2054' + | '\u2060'..'\u2063' + | '\u206a'..'\u206f' + | '\u2071' + | '\u207f' + | '\u20a0'..'\u20b1' + | '\u20d0'..'\u20dc' + | '\u20e1' + | '\u20e5'..'\u20ea' + | '\u2102' + | '\u2107' + | '\u210a'..'\u2113' + | '\u2115' + | '\u2119'..'\u211d' + | '\u2124' + | '\u2126' + | '\u2128' + | '\u212a'..'\u212d' + | '\u212f'..'\u2131' + | '\u2133'..'\u2139' + | '\u213d'..'\u213f' + | '\u2145'..'\u2149' + | '\u2160'..'\u2183' + | '\u3005'..'\u3007' + | '\u3021'..'\u302f' + | '\u3031'..'\u3035' + | '\u3038'..'\u303c' + | '\u3041'..'\u3096' + | '\u3099'..'\u309a' + | '\u309d'..'\u309f' + | '\u30a1'..'\u30ff' + | '\u3105'..'\u312c' + | '\u3131'..'\u318e' + | '\u31a0'..'\u31b7' + | '\u31f0'..'\u31ff' + | '\u3400'..'\u4db5' + | '\u4e00'..'\u9fa5' + | '\ua000'..'\ua48c' + | '\uac00'..'\ud7a3' + | '\uf900'..'\ufa2d' + | '\ufa30'..'\ufa6a' + | '\ufb00'..'\ufb06' + | '\ufb13'..'\ufb17' + | '\ufb1d'..'\ufb28' + | '\ufb2a'..'\ufb36' + | '\ufb38'..'\ufb3c' + | '\ufb3e' + | '\ufb40'..'\ufb41' + | '\ufb43'..'\ufb44' + | '\ufb46'..'\ufbb1' + | '\ufbd3'..'\ufd3d' + | '\ufd50'..'\ufd8f' + | '\ufd92'..'\ufdc7' + | '\ufdf0'..'\ufdfc' + | '\ufe00'..'\ufe0f' + | '\ufe20'..'\ufe23' + | '\ufe33'..'\ufe34' + | '\ufe4d'..'\ufe4f' + | '\ufe69' + | '\ufe70'..'\ufe74' + | '\ufe76'..'\ufefc' + | '\ufeff' + | '\uff04' + | '\uff10'..'\uff19' + | '\uff21'..'\uff3a' + | '\uff3f' + | '\uff41'..'\uff5a' + | '\uff65'..'\uffbe' + | '\uffc2'..'\uffc7' + | '\uffca'..'\uffcf' + | '\uffd2'..'\uffd7' + | '\uffda'..'\uffdc' + | '\uffe0'..'\uffe1' + | '\uffe5'..'\uffe6' + | '\ufff9'..'\ufffb' + | ('\ud800'..'\udbff') ('\udc00'..'\udfff') + ; + diff --git a/topics/grammars/java/antlr-java-6/README.txt b/topics/grammars/java/antlr-java-6/README.txt new file mode 100644 index 00000000..0ff474d8 --- /dev/null +++ b/topics/grammars/java/antlr-java-6/README.txt @@ -0,0 +1,8 @@ +http://www.antlr.org/grammar/list: + ↳ http://openjdk.java.net/projects/compiler-grammar/antlrworks/Java.g + +Java 1.6 grammar +Yang Jiang Fri Jan 16, 2009 12:01 +ANTLR-based Java grammar from openjdk project to develop an experimental version of the javac compiler based upon a grammar written in ANTLR. Derived from Terence Parr's Java 1.5 grammar. NOTE:: Use ANTLR -Xconversiontimeout 100000 option. If it still doesn't work or the compilation process takes too long, try to comment out the following two lines: +| {isValidSurrogateIdentifierStart((char)input.LT(1), (char)input.LT(2))}?=>('\ud800'..'\udbff') ('\udc00'..'\udfff') +| {isValidSurrogateIdentifierPart((char)input.LT(1), (char)input.LT(2))}?=>('\ud800'..'\udbff') ('\udc00'..'\udfff')