diff --git a/build.gradle b/build.gradle index cea93a2ab..639c4bca1 100644 --- a/build.gradle +++ b/build.gradle @@ -53,7 +53,7 @@ configure(subprojects.findAll {it.name != 'core'}) { } } -configure(subprojects.findAll { it.name.startsWith('gen.antlr') }) { +configure(subprojects.findAll { it.name == 'gen.antlr' || it.name.startsWith('gen.antlr-') }) { apply plugin: 'antlr' dependencies { @@ -67,6 +67,24 @@ configure(subprojects.findAll { it.name.startsWith('gen.antlr') }) { } } +configure(subprojects.findAll { it.name == 'gen.antlr4' || it.name.startsWith('gen.antlr4-') }) { + apply plugin: 'antlr' + + dependencies { + antlr "org.antlr:antlr4:4.5.3" + } + + generateGrammarSource { + outputDirectory = new File("${project.buildDir}/generated-src/antlr/main/com/github/gumtreediff/gen/php71".toString()) + } + + if (it.name.startsWith('gen.antlr4-')) { + dependencies { + compile project(':gen.antlr4') + } + } +} + def jacocoProjectsNames = ['core', 'gen.jdt', 'gen.c', 'gen.ruby', 'gen.js', 'gen.srcml', 'gen.css'] def jacocoProjects = subprojects.findAll { it.name in jacocoProjectsNames} configure(jacocoProjects) { diff --git a/dist/build.gradle b/dist/build.gradle index e6dd33fb9..3e88f55b1 100644 --- a/dist/build.gradle +++ b/dist/build.gradle @@ -21,6 +21,8 @@ dependencies { compile project(':gen.antlr-php') compile project(':gen.antlr-r') compile project(':gen.antlr-xml') + compile project(':gen.antlr4') + compile project(':gen.antlr4-php') compile project(':gen.c') compile project(':gen.css') compile project(':gen.jdt') diff --git a/gen.antlr-php/src/main/java/com/github/gumtreediff/gen/php/PhpTreeGenerator.java b/gen.antlr-php/src/main/java/com/github/gumtreediff/gen/php/PhpTreeGenerator.java index bae7621b4..6664b7a50 100644 --- a/gen.antlr-php/src/main/java/com/github/gumtreediff/gen/php/PhpTreeGenerator.java +++ b/gen.antlr-php/src/main/java/com/github/gumtreediff/gen/php/PhpTreeGenerator.java @@ -20,6 +20,7 @@ package com.github.gumtreediff.gen.php; +import com.github.gumtreediff.gen.Registry; import com.github.gumtreediff.gen.antlr.AbstractAntlrTreeGenerator; import com.github.gumtreediff.gen.Register; import com.github.gumtreediff.gen.antlr.AbstractAntlrTreeGenerator; @@ -29,7 +30,7 @@ import java.io.IOException; import java.io.Reader; -@Register(id = "php-antlr", accept = "\\.php.?$") +@Register(id = "php-antlr", accept = "\\.php.?$", priority = Registry.Priority.LOW) public class PhpTreeGenerator extends AbstractAntlrTreeGenerator { @Override diff --git a/gen.antlr4-php/build.gradle b/gen.antlr4-php/build.gradle new file mode 100644 index 000000000..9f645dc08 --- /dev/null +++ b/gen.antlr4-php/build.gradle @@ -0,0 +1 @@ +description = 'GumTree tree generator for PHP7.1 code (AntLR4 based).' diff --git a/gen.antlr4-php/src/main/antlr/PHPLexer.g4 b/gen.antlr4-php/src/main/antlr/PHPLexer.g4 new file mode 100644 index 000000000..c1e64e5d5 --- /dev/null +++ b/gen.antlr4-php/src/main/antlr/PHPLexer.g4 @@ -0,0 +1,474 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2016, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +lexer grammar PHPLexer; + +channels { PhpComments, ErrorLexem } + +@header { + package com.github.gumtreediff.gen.php71; +} + +@lexer::members +{public boolean AspTags = true; +boolean _scriptTag; +boolean _styleTag; +String _heredocIdentifier; +int _prevTokenType; +String _htmlNameText; +boolean _phpScript; +boolean _insideString; + +@Override +public Token nextToken() +{ + CommonToken token = (CommonToken)super.nextToken(); + + if (token.getType() == PHPEnd || token.getType() == PHPEndSingleLineComment) + { + if (_mode == SingleLineCommentMode) + { + // SingleLineCommentMode for such allowed syntax: + // + popMode(); // exit from SingleLineComment mode. + } + popMode(); // exit from PHP mode. + + if (token.getText().equals("")) + { + _phpScript = false; + token.setType(ScriptClose); + } + else + { + // Add semicolon to the end of statement if it is absente. + // For example: + if (_prevTokenType == SemiColon || _prevTokenType == Colon + || _prevTokenType == OpenCurlyBracket || _prevTokenType == CloseCurlyBracket) + { + token = (CommonToken)super.nextToken(); + } + else + { + token = new CommonToken(SemiColon); + } + } + } + else if (token.getType() == HtmlName) + { + _htmlNameText = token.getText(); + } + else if (token.getType() == HtmlDoubleQuoteString) + { + if (token.getText().equals("php") && _htmlNameText.equals("language")) + { + _phpScript = true; + } + } + else if (_mode == HereDoc) + { + // Heredoc and Nowdoc syntax support: http://php.net/manual/en/language.types.string.php#language.types.string.syntax.heredoc + switch (token.getType()) + { + case StartHereDoc: + case StartNowDoc: + _heredocIdentifier = token.getText().substring(3).trim().replace("\'",""); + break; + + case HereDocText: + if (CheckHeredocEnd(token.getText())) + { + popMode(); + if (token.getText().trim().endsWith(";")) + { + token = new CommonToken(SemiColon); + } + else + { + token = (CommonToken)super.nextToken(); + } + } + break; + } + } + else if (_mode == PHP) + { + if (_channel != HIDDEN) + { + _prevTokenType = token.getType(); + } + } + + return token; +} + +boolean CheckHeredocEnd(String text) +{ + text = text.trim(); + boolean semi = (text.length() > 0) ? (text.charAt(text.length() - 1) == ';') : false; + String identifier = semi ? text.substring(0, text.length() - 1) : text; + boolean result = identifier.equals(_heredocIdentifier); + return result; +}} + +SeaWhitespace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlText: ~[<#]+; +PHPStartEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStart: PhpStartFragment -> skip, pushMode(PHP); +HtmlScriptOpen: '<' 'script' { _scriptTag = true; } -> pushMode(INSIDE); +HtmlStyleOpen: '<' 'style' { _styleTag = true; } -> pushMode(INSIDE); +HtmlComment: '<' '!' '--' .*? '-->' -> channel(HIDDEN); +HtmlDtd: '<' '!' .*? '>'; +HtmlOpen: '<' -> pushMode(INSIDE); +Shebang + : { _input.LA(-1) <= 0 || _input.LA(-1) == '\r' || _input.LA(-1) == '\n' }? '#' '!' ~[\r\n]* + ; +NumberSign: '#' ~[<]* -> more; +Error: . -> channel(ErrorLexem); + +mode INSIDE; + +PHPStartEchoInside: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInside: PhpStartFragment -> skip, pushMode(PHP); +HtmlClose: '>' { +popMode(); +if (_scriptTag) +{ + if (!_phpScript) + { + pushMode(SCRIPT); + } + else + { + pushMode(PHP); + } + _scriptTag = false; +} +else if (_styleTag) +{ + pushMode(STYLE); + _styleTag = false; +} +}; +HtmlSlashClose: '/>' -> popMode; +HtmlSlash: '/'; +HtmlEquals: '='; + +HtmlStartQuoteString: '\\'? '\'' -> pushMode(HtmlQuoteStringMode); +HtmlStartDoubleQuoteString: '\\'? '"' -> pushMode(HtmlDoubleQuoteStringMode); +HtmlHex: '#' HexDigit+ ; +HtmlDecimal: Digit+; +HtmlSpace: [ \t\r\n]+ -> channel(HIDDEN); +HtmlName: NameStartChar NameChar*; +ErrorInside: . -> channel(ErrorLexem); + +mode HtmlQuoteStringMode; + +PHPStartEchoInsideQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideQuoteString: PhpStartFragment -> skip, pushMode(PHP); +HtmlEndQuoteString: '\'' '\''? -> popMode; +HtmlQuoteString: ~[<']+; +ErrorHtmlQuote: . -> channel(ErrorLexem); + +mode HtmlDoubleQuoteStringMode; + +PHPStartEchoDoubleQuoteString: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartDoubleQuoteString: PhpStartFragment -> skip, pushMode(PHP); +HtmlEndDoubleQuoteString: '"' '"'? -> popMode; +HtmlDoubleQuoteString: ~[<"]+; +ErrorHtmlDoubleQuote: . -> channel(ErrorLexem); + +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/ecmascript if necessary. +// Php blocks can exist inside Script blocks too. +mode SCRIPT; + +ScriptText: ~[<]+; +ScriptClose: '<' '/' 'script'? '>' -> popMode; +PHPStartInsideScriptEcho: PhpStartEchoFragment -> type(Echo), pushMode(PHP); +PHPStartInsideScript: PhpStartFragment-> skip, pushMode(PHP); +ScriptText2: '<' ~[ type(ScriptText); +ScriptText3: '?' ~[<]* -> type(ScriptText); +ScriptText4: '/' ~[<]* -> type(ScriptText); + +mode STYLE; + +StyleBody: .*? '' -> popMode; + +mode PHP; + +PHPEnd: (('?' | {AspTags}? '%') '>') | {_phpScript}? ''; +Whitespace: [ \t\r\n]+ -> skip; +MultiLineComment: '/*' .*? '*/' -> channel(PhpComments); +SingleLineComment: '//' -> skip, pushMode(SingleLineCommentMode); +ShellStyleComment: '#' -> skip, pushMode(SingleLineCommentMode); + +Abstract: 'abstract'; +Array: 'array'; +As: 'as'; +BinaryCast: 'binary'; +BoolType: 'boolean' | 'bool'; +BooleanConstant: 'true' | 'false'; +Break: 'break'; +Callable: 'callable'; +Case: 'case'; +Catch: 'catch'; +Class: 'class'; +Clone: 'clone'; +Const: 'const'; +Continue: 'continue'; +Declare: 'declare'; +Default: 'default'; +Do: 'do'; +DoubleCast: 'real'; +DoubleType: 'double'; +Echo: 'echo'; +Else: 'else'; +ElseIf: 'elseif'; +Empty: 'empty'; + +EndDeclare: 'enddeclare'; +EndFor: 'endfor'; +EndForeach: 'endforeach'; +EndIf: 'endif'; +EndSwitch: 'endswitch'; +EndWhile: 'endwhile'; + +Eval: 'eval'; +Exit: 'die'; +Extends: 'extends'; +Final: 'final'; +Finally: 'finally'; +FloatCast: 'float'; +For: 'for'; +Foreach: 'foreach'; +Function: 'function'; +Global: 'global'; +Goto: 'goto'; +If: 'if'; +Implements: 'implements'; +Import: 'import'; +Include: 'include'; +IncludeOnce: 'include_once'; +InstanceOf: 'instanceof'; +InsteadOf: 'insteadof'; +Int8Cast: 'int8'; +Int16Cast: 'int16'; +Int64Type: 'int64'; +IntType: 'int' 'eger'?; +Interface: 'interface'; +IsSet: 'isset'; +List: 'list'; +LogicalAnd: 'and'; +LogicalOr: 'or'; +LogicalXor: 'xor'; +Namespace: 'namespace'; +New: 'new'; +Null: 'null'; +ObjectType: 'object'; +Parent_: 'parent'; +Partial: 'partial'; +Print: 'print'; +Private: 'private'; +Protected: 'protected'; +Public: 'public'; +Require: 'require'; +RequireOnce: 'require_once'; +Resource: 'resource'; +Return: 'return'; +Static: 'static'; +StringType: 'string'; +Switch: 'switch'; +Throw: 'throw'; +Trait: 'trait'; +Try: 'try'; +Typeof: 'clrtypeof'; +UintCast: 'uint' ('8' | '16' | '64')?; +UnicodeCast: 'unicode'; +Unset: 'unset'; +Use: 'use'; +Var: 'var'; +While: 'while'; +Yield: 'yield'; + +Get: '__get'; +Set: '__set'; +Call: '__call'; +CallStatic: '__callstatic'; +Constructor: '__construct'; +Destruct: '__destruct'; +Wakeup: '__wakeup'; +Sleep: '__sleep'; +Autoload: '__autoload'; +IsSet__: '__isset'; +Unset__: '__unset'; +ToString__: '__tostring'; +Invoke: '__invoke'; +SetState: '__set_state'; +Clone__: '__clone'; +DebugInfo: '__debuginfo'; +Namespace__: '__namespace__'; +Class__: '__class__'; +Traic__: '__trait__'; +Function__: '__function__'; +Method__: '__method__'; +Line__: '__line__'; +File__: '__file__'; +Dir__: '__dir__'; + +Lgeneric: '<:'; +Rgeneric: ':>'; +DoubleArrow: '=>'; +Inc: '++'; +Dec: '--'; +IsIdentical: '==='; +IsNoidentical: '!=='; +IsEqual: '=='; +IsNotEq: '<>' | '!='; +IsSmallerOrEqual: '<='; +IsGreaterOrEqual: '>='; +PlusEqual: '+='; +MinusEqual: '-='; +MulEqual: '*='; +Pow: '**'; +PowEqual: '**='; +DivEqual: '/='; +Concaequal: '.='; +ModEqual: '%='; +ShiftLeftEqual: '<<='; +ShiftRightEqual: '>>='; +AndEqual: '&='; +OrEqual: '|='; +XorEqual: '^='; +BooleanOr: '||'; +BooleanAnd: '&&'; +ShiftLeft: '<<'; +ShiftRight: '>>'; +DoubleColon: '::'; +ObjectOperator: '->'; +NamespaceSeparator: '\\'; +Ellipsis: '...'; +Less: '<'; +Greater: '>'; +Ampersand: '&'; +Pipe: '|'; +Bang: '!'; +Caret: '^'; +Plus: '+'; +Minus: '-'; +Asterisk: '*'; +Percent: '%'; +Divide: '/'; +Tilde: '~'; +SuppressWarnings: '@'; +Dollar: '$'; +Dot: '.'; +QuestionMark: '?'; +OpenRoundBracket: '('; +CloseRoundBracket: ')'; +OpenSquareBracket: '['; +CloseSquareBracket: ']'; +OpenCurlyBracket: '{'; +CloseCurlyBracket: '}' +{ +if (_insideString) +{ + _insideString = false; + skip(); + popMode(); +} +}; +Comma: ','; +Colon: ':'; +SemiColon: ';'; +Eq: '='; +Quote: '\''; +BackQuote: '`'; + +VarName: '$' [a-zA-Z_][a-zA-Z_0-9]*; +Label: [a-zA-Z_][a-zA-Z_0-9]*; +Octal: '0' [0-7]+; +Decimal: Digit+; +Real: (Digit+ '.' Digit* | '.' Digit+) ExponentPart? | Digit+ ExponentPart; +Hex: '0x' HexDigit+; +Binary: '0b' [01]+; + +BackQuoteString: '`' ~'`'* '`'; +SingleQuoteString: '\'' (~('\'' | '\\') | '\\' . )* '\''; +DoubleQuote: '"' -> pushMode(InterpolationString); + +StartNowDoc + : '<<<' [ \t]* '\'' [a-zA-Z_][a-zA-Z_0-9]* '\'' { _input.LA(1) == '\r' || _input.LA(1) == '\n' }? -> pushMode(HereDoc) + ; +StartHereDoc + : '<<<' [ \t]* [a-zA-Z_][a-zA-Z_0-9]* { _input.LA(1) == '\r' || _input.LA(1) == '\n' }? -> pushMode(HereDoc) + ; +ErrorPhp: . -> channel(ErrorLexem); + +mode InterpolationString; + +VarNameInInterpolation: '$' [a-zA-Z_][a-zA-Z_0-9]* -> type(VarName); // TODO: fix such cases: "$people->john" +DollarString: '$' -> type(StringPart); +CurlyDollar: '{' {_input.LA(1) == '$'}? {_insideString = true;} -> skip, pushMode(PHP); +CurlyString: '{' -> type(StringPart); +EscapedChar: '\\' . -> type(StringPart); +DoubleQuoteInInterpolation: '"' -> type(DoubleQuote), popMode; +StringPart: ~[${\\"]+; + +mode SingleLineCommentMode; + +Comment: ~[\r\n?]+ -> channel(PhpComments); +PHPEndSingleLineComment: '?' '>'; +CommentQuestionMark: '?' -> type(Comment), channel(PhpComments); +CommentEnd: [\r\n] -> skip, popMode; // exit from comment. + +mode HereDoc; // TODO: interpolation for heredoc strings. + +HereDocText: ~[\r\n]*? ('\r'? '\n' | '\r'); + +// fragments. +// '' will be transformed to '' +fragment PhpStartEchoFragment: '<' ('?' '=' | {AspTags}? '%' '='); +fragment PhpStartFragment: '<' ('?' 'php'? | {AspTags}? '%'); +fragment NameChar + : NameStartChar + | '-' + | '_' + | '.' + | Digit + | '\u00B7' + | '\u0300'..'\u036F' + | '\u203F'..'\u2040' + ; +fragment NameStartChar + : [:a-zA-Z] + | '\u2070'..'\u218F' + | '\u2C00'..'\u2FEF' + | '\u3001'..'\uD7FF' + | '\uF900'..'\uFDCF' + | '\uFDF0'..'\uFFFD' + ; +fragment ExponentPart: 'e' [+-]? Digit+; +fragment Digit: [0-9]; +fragment HexDigit: [a-fA-F0-9]; diff --git a/gen.antlr4-php/src/main/antlr/PHPLexer.tokens b/gen.antlr4-php/src/main/antlr/PHPLexer.tokens new file mode 100644 index 000000000..537221b53 --- /dev/null +++ b/gen.antlr4-php/src/main/antlr/PHPLexer.tokens @@ -0,0 +1,388 @@ +SeaWhitespace=1 +HtmlText=2 +PHPStart=3 +HtmlScriptOpen=4 +HtmlStyleOpen=5 +HtmlComment=6 +HtmlDtd=7 +HtmlOpen=8 +Shebang=9 +Error=10 +PHPStartInside=11 +HtmlClose=12 +HtmlSlashClose=13 +HtmlSlash=14 +HtmlEquals=15 +HtmlStartQuoteString=16 +HtmlStartDoubleQuoteString=17 +HtmlHex=18 +HtmlDecimal=19 +HtmlSpace=20 +HtmlName=21 +ErrorInside=22 +PHPStartInsideQuoteString=23 +HtmlEndQuoteString=24 +HtmlQuoteString=25 +ErrorHtmlQuote=26 +PHPStartDoubleQuoteString=27 +HtmlEndDoubleQuoteString=28 +HtmlDoubleQuoteString=29 +ErrorHtmlDoubleQuote=30 +ScriptText=31 +ScriptClose=32 +PHPStartInsideScript=33 +StyleBody=34 +PHPEnd=35 +Whitespace=36 +MultiLineComment=37 +SingleLineComment=38 +ShellStyleComment=39 +Abstract=40 +Array=41 +As=42 +BinaryCast=43 +BoolType=44 +BooleanConstant=45 +Break=46 +Callable=47 +Case=48 +Catch=49 +Class=50 +Clone=51 +Const=52 +Continue=53 +Declare=54 +Default=55 +Do=56 +DoubleCast=57 +DoubleType=58 +Echo=59 +Else=60 +ElseIf=61 +Empty=62 +EndDeclare=63 +EndFor=64 +EndForeach=65 +EndIf=66 +EndSwitch=67 +EndWhile=68 +Eval=69 +Exit=70 +Extends=71 +Final=72 +Finally=73 +FloatCast=74 +For=75 +Foreach=76 +Function=77 +Global=78 +Goto=79 +If=80 +Implements=81 +Import=82 +Include=83 +IncludeOnce=84 +InstanceOf=85 +InsteadOf=86 +Int8Cast=87 +Int16Cast=88 +Int64Type=89 +IntType=90 +Interface=91 +IsSet=92 +List=93 +LogicalAnd=94 +LogicalOr=95 +LogicalXor=96 +Namespace=97 +New=98 +Null=99 +ObjectType=100 +Parent_=101 +Partial=102 +Print=103 +Private=104 +Protected=105 +Public=106 +Require=107 +RequireOnce=108 +Resource=109 +Return=110 +Static=111 +StringType=112 +Switch=113 +Throw=114 +Trait=115 +Try=116 +Typeof=117 +UintCast=118 +UnicodeCast=119 +Unset=120 +Use=121 +Var=122 +While=123 +Yield=124 +Get=125 +Set=126 +Call=127 +CallStatic=128 +Constructor=129 +Destruct=130 +Wakeup=131 +Sleep=132 +Autoload=133 +IsSet__=134 +Unset__=135 +ToString__=136 +Invoke=137 +SetState=138 +Clone__=139 +DebugInfo=140 +Namespace__=141 +Class__=142 +Traic__=143 +Function__=144 +Method__=145 +Line__=146 +File__=147 +Dir__=148 +Lgeneric=149 +Rgeneric=150 +DoubleArrow=151 +Inc=152 +Dec=153 +IsIdentical=154 +IsNoidentical=155 +IsEqual=156 +IsNotEq=157 +IsSmallerOrEqual=158 +IsGreaterOrEqual=159 +PlusEqual=160 +MinusEqual=161 +MulEqual=162 +Pow=163 +PowEqual=164 +DivEqual=165 +Concaequal=166 +ModEqual=167 +ShiftLeftEqual=168 +ShiftRightEqual=169 +AndEqual=170 +OrEqual=171 +XorEqual=172 +BooleanOr=173 +BooleanAnd=174 +ShiftLeft=175 +ShiftRight=176 +DoubleColon=177 +ObjectOperator=178 +NamespaceSeparator=179 +Ellipsis=180 +Less=181 +Greater=182 +Ampersand=183 +Pipe=184 +Bang=185 +Caret=186 +Plus=187 +Minus=188 +Asterisk=189 +Percent=190 +Divide=191 +Tilde=192 +SuppressWarnings=193 +Dollar=194 +Dot=195 +QuestionMark=196 +OpenRoundBracket=197 +CloseRoundBracket=198 +OpenSquareBracket=199 +CloseSquareBracket=200 +OpenCurlyBracket=201 +CloseCurlyBracket=202 +Comma=203 +Colon=204 +SemiColon=205 +Eq=206 +Quote=207 +BackQuote=208 +VarName=209 +Label=210 +Octal=211 +Decimal=212 +Real=213 +Hex=214 +Binary=215 +BackQuoteString=216 +SingleQuoteString=217 +DoubleQuote=218 +StartNowDoc=219 +StartHereDoc=220 +ErrorPhp=221 +CurlyDollar=222 +StringPart=223 +Comment=224 +PHPEndSingleLineComment=225 +CommentEnd=226 +HereDocText=227 +'/>'=13 +'//'=38 +'#'=39 +'abstract'=40 +'array'=41 +'as'=42 +'binary'=43 +'break'=46 +'callable'=47 +'case'=48 +'catch'=49 +'class'=50 +'clone'=51 +'const'=52 +'continue'=53 +'declare'=54 +'default'=55 +'do'=56 +'real'=57 +'double'=58 +'echo'=59 +'else'=60 +'elseif'=61 +'empty'=62 +'enddeclare'=63 +'endfor'=64 +'endforeach'=65 +'endif'=66 +'endswitch'=67 +'endwhile'=68 +'eval'=69 +'die'=70 +'extends'=71 +'final'=72 +'finally'=73 +'float'=74 +'for'=75 +'foreach'=76 +'function'=77 +'global'=78 +'goto'=79 +'if'=80 +'implements'=81 +'import'=82 +'include'=83 +'include_once'=84 +'instanceof'=85 +'insteadof'=86 +'int8'=87 +'int16'=88 +'int64'=89 +'interface'=91 +'isset'=92 +'list'=93 +'and'=94 +'or'=95 +'xor'=96 +'namespace'=97 +'new'=98 +'null'=99 +'object'=100 +'parent'=101 +'partial'=102 +'print'=103 +'private'=104 +'protected'=105 +'public'=106 +'require'=107 +'require_once'=108 +'resource'=109 +'return'=110 +'static'=111 +'string'=112 +'switch'=113 +'throw'=114 +'trait'=115 +'try'=116 +'clrtypeof'=117 +'unicode'=119 +'unset'=120 +'use'=121 +'var'=122 +'while'=123 +'yield'=124 +'__get'=125 +'__set'=126 +'__call'=127 +'__callstatic'=128 +'__construct'=129 +'__destruct'=130 +'__wakeup'=131 +'__sleep'=132 +'__autoload'=133 +'__isset'=134 +'__unset'=135 +'__tostring'=136 +'__invoke'=137 +'__set_state'=138 +'__clone'=139 +'__debuginfo'=140 +'__namespace__'=141 +'__class__'=142 +'__trait__'=143 +'__function__'=144 +'__method__'=145 +'__line__'=146 +'__file__'=147 +'__dir__'=148 +'<:'=149 +':>'=150 +'=>'=151 +'++'=152 +'--'=153 +'==='=154 +'!=='=155 +'=='=156 +'<='=158 +'>='=159 +'+='=160 +'-='=161 +'*='=162 +'**'=163 +'**='=164 +'/='=165 +'.='=166 +'%='=167 +'<<='=168 +'>>='=169 +'&='=170 +'|='=171 +'^='=172 +'||'=173 +'&&'=174 +'<<'=175 +'>>'=176 +'::'=177 +'->'=178 +'\\'=179 +'...'=180 +'&'=183 +'|'=184 +'!'=185 +'^'=186 +'+'=187 +'-'=188 +'*'=189 +'%'=190 +'~'=192 +'@'=193 +'.'=195 +'?'=196 +'('=197 +')'=198 +'['=199 +']'=200 +'}'=202 +','=203 +':'=204 +';'=205 +'\''=207 +'`'=208 diff --git a/gen.antlr4-php/src/main/antlr/PHPParser.g4 b/gen.antlr4-php/src/main/antlr/PHPParser.g4 new file mode 100644 index 000000000..6238cf2de --- /dev/null +++ b/gen.antlr4-php/src/main/antlr/PHPParser.g4 @@ -0,0 +1,895 @@ +/* +PHP grammar. +The MIT License (MIT). +Copyright (c) 2015-2016, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +parser grammar PHPParser; + +@header { + package com.github.gumtreediff.gen.php71; +} + +options { tokenVocab=PHPLexer; } + +// HTML +// Also see here: https://github.com/antlr/grammars-v4/tree/master/html + +htmlDocument + : Shebang? htmlElementOrPhpBlock* EOF + ; + +htmlElementOrPhpBlock + : htmlElement + | phpBlock + | scriptTextPart + ; + +htmlElement + : HtmlDtd | HtmlScriptOpen | HtmlClose | HtmlStyleOpen | ScriptClose | HtmlStyleOpen | StyleBody | HtmlOpen | HtmlName | '/>' | HtmlSlash | HtmlText + | HtmlEquals | HtmlStartQuoteString | HtmlEndQuoteString | HtmlStartDoubleQuoteString | HtmlEndDoubleQuoteString | HtmlHex | HtmlDecimal + | HtmlQuoteString | HtmlDoubleQuoteString + ; + +// Script +// Parse JavaScript with https://github.com/antlr/grammars-v4/tree/master/ecmascript if necessary. + +scriptTextPart + : ScriptText+ + ; + +// PHP + +phpBlock + : importStatement* topStatement+ + ; + +importStatement + : Import Namespace namespaceNameList ';' + ; + +topStatement + : emptyStatement + | nonEmptyStatement + | useDeclaration + | namespaceDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +useDeclaration + : Use (Function | Const)? useDeclarationContentList ';' + ; + +useDeclarationContentList + : '\\'? useDeclarationContent (',' '\\'? useDeclarationContent)* + ; + +useDeclarationContent + : namespaceNameList (As identifier)? + ; + +namespaceDeclaration + : Namespace (namespaceNameList? OpenCurlyBracket namespaceStatement* '}' | namespaceNameList ';') + ; + +namespaceStatement + : nonEmptyStatement + | useDeclaration + | functionDeclaration + | classDeclaration + | globalConstantDeclaration + ; + +functionDeclaration + : attributes Function '&'? identifier typeParameterListInBrackets? '(' formalParameterList ')' blockStatement + ; + +classDeclaration + : attributes Private? modifier? Partial? ( + classEntryType identifier typeParameterListInBrackets? (Extends qualifiedStaticTypeRef)? (Implements interfaceList)? + | Interface identifier typeParameterListInBrackets? (Extends interfaceList)? ) + OpenCurlyBracket classStatement* '}' + ; + +classEntryType + : Class + | Trait + ; + +interfaceList + : qualifiedStaticTypeRef (',' qualifiedStaticTypeRef)* + ; + +typeParameterListInBrackets + : '<:' typeParameterList ':>' + | '<:' typeParameterWithDefaultsList ':>' + | '<:' typeParameterList ',' typeParameterWithDefaultsList ':>' + ; + +typeParameterList + : typeParameterDecl (',' typeParameterDecl)* + ; + +typeParameterWithDefaultsList + : typeParameterWithDefaultDecl (',' typeParameterWithDefaultDecl)* + ; + +typeParameterDecl + : attributes identifier + ; + +typeParameterWithDefaultDecl + : attributes identifier Eq (qualifiedStaticTypeRef | primitiveType) + ; + +genericDynamicArgs + : '<:' typeRef (',' typeRef)* ':>' + ; + +attributes + : attributesGroup* + ; + +attributesGroup + : '[' (identifier ':')? attribute (',' attribute)* ']' + ; + +attribute + : qualifiedNamespaceName + | qualifiedNamespaceName '(' attributeArgList ')' + | qualifiedNamespaceName '(' attributeNamedArgList ')' + | qualifiedNamespaceName '(' attributeArgList ',' attributeNamedArgList ')' + ; + +attributeArgList + : expression (',' expression)* + ; + +attributeNamedArgList + : attributeNamedArg (',' attributeNamedArg)* + ; + +attributeNamedArg + : VarName '=>' expression + ; + +innerStatementList + : innerStatement* + ; + +innerStatement + : statement + | functionDeclaration + | classDeclaration + ; + +// Statements + +statement + : nonEmptyStatement + | emptyStatement + ; + +emptyStatement + : ';' + ; + +nonEmptyStatement + : identifier ':' + | blockStatement + | ifStatement + | whileStatement + | doWhileStatement + | forStatement + | switchStatement + | breakStatement + | continueStatement + | returnStatement + | yieldExpression ';' + | globalStatement + | staticVariableStatement + | echoStatement + | expressionStatement + | unsetStatement + | foreachStatement + | tryCatchFinally + | throwStatement + | gotoStatement + | declareStatement + | inlineHtml + ; + +blockStatement + : OpenCurlyBracket innerStatementList '}' + ; + +ifStatement + : If parenthesis statement elseIfStatement* elseStatement? + | If parenthesis ':' innerStatementList elseIfColonStatement* elseColonStatement? EndIf ';' + ; + +elseIfStatement + : ElseIf parenthesis statement + ; + +elseIfColonStatement + : ElseIf parenthesis ':' innerStatementList + ; + +elseStatement + : Else statement + ; + +elseColonStatement + : Else ':' innerStatementList + ; + +whileStatement + : While parenthesis (statement | ':' innerStatementList EndWhile ';') + ; + +doWhileStatement + : Do statement While parenthesis ';' + ; + +forStatement + : For '(' forInit? ';' expressionList? ';' forUpdate? ')' (statement | ':' innerStatementList EndFor ';' ) + ; + +forInit + : expressionList + ; + +forUpdate + : expressionList + ; + +switchStatement + : Switch parenthesis (OpenCurlyBracket ';'? switchBlock* '}' | ':' ';'? switchBlock* EndSwitch ';') + ; + +switchBlock + : ((Case expression | Default) ( ':' | ';' ))+ innerStatementList + ; + +breakStatement + : Break expression? ';' + ; + +continueStatement + : Continue expression? ';' + ; + +returnStatement + : Return expression? ';' + ; + +expressionStatement + : expression ';' + ; + +unsetStatement + : Unset '(' chainList ')' ';' + ; + +foreachStatement + : Foreach + ( '(' chain As '&'? chain ('=>' '&'? chain)? ')' + | '(' expression As chain ('=>' '&'? chain)? ')' + | '(' chain As List '(' assignmentList ')' ')' ) + (statement | ':' innerStatementList EndForeach ';') + ; + +tryCatchFinally + : Try blockStatement (catchClause+ finallyStatement? | catchClause* finallyStatement) + ; + +catchClause + : Catch '(' qualifiedStaticTypeRef VarName ')' blockStatement + ; + +finallyStatement + : Finally blockStatement + ; + +throwStatement + : Throw expression ';' + ; + +gotoStatement + : Goto identifier ';' + ; + +declareStatement + : Declare '(' declareList ')' (statement | ':' innerStatementList EndDeclare ';') + ; + +inlineHtml + : (htmlElement | scriptTextPart)+ //htmlElementOrPhpBlock* + ; + +declareList + : identifierInititalizer (',' identifierInititalizer)* + ; + +formalParameterList + : formalParameter? (',' formalParameter)* + ; + +formalParameter + : attributes typeHint? '&'? '...'? variableInitializer + ; + +typeHint + : qualifiedStaticTypeRef + | Callable + | primitiveType + ; + +globalStatement + : Global globalVar (',' globalVar)* ';' + ; + +globalVar + : VarName + | Dollar chain + | Dollar OpenCurlyBracket expression '}' + ; + +echoStatement + : Echo expressionList ';' + ; + +staticVariableStatement + : Static variableInitializer (',' variableInitializer)* ';' + ; + +classStatement + : attributes propertyModifiers variableInitializer (',' variableInitializer)* ';' + | attributes Const identifierInititalizer (',' identifierInititalizer)* ';' + | attributes memberModifiers? Function '&'? identifier + typeParameterListInBrackets? '(' formalParameterList ')' baseCtorCall? methodBody + | Use qualifiedNamespaceNameList traitAdaptations + ; + +traitAdaptations + : ';' + | OpenCurlyBracket traitAdaptationStatement* '}' + ; + +traitAdaptationStatement + : traitPrecedence + | traitAlias + ; + +traitPrecedence + : qualifiedNamespaceName '::' identifier InsteadOf qualifiedNamespaceNameList ';' + ; + +traitAlias + : traitMethodReference As (memberModifier | memberModifier? identifier) ';' + ; + +traitMethodReference + : (qualifiedNamespaceName '::')? identifier + ; + +baseCtorCall + : ':' identifier arguments + ; + +methodBody + : ';' + | blockStatement + ; + +propertyModifiers + : memberModifiers + | Var + ; + +memberModifiers + : memberModifier+ + ; + +variableInitializer + : VarName (Eq constantInititalizer)? + ; + +identifierInititalizer + : identifier Eq constantInititalizer + ; + +globalConstantDeclaration + : attributes Const identifierInititalizer (',' identifierInititalizer)* ';' + ; + +expressionList + : expression (',' expression)* + ; + +parenthesis + : '(' (expression | yieldExpression) ')' + ; + +// Expressions +// Grouped by prioriries: http://php.net/manual/en/language.operators.precedence.php +// and http://www.phpeveryday.com/articles/PHP-Operators-Operator-Priority-P312.html +expression + : andOrExpression + | expression QuestionMark expression? ':' andOrExpression + | expression LogicalAnd andOrExpression + | expression LogicalXor andOrExpression + | expression LogicalOr andOrExpression + ; + +andOrExpression + : comparisonExpression + | andOrExpression '&' comparisonExpression + | andOrExpression '^' comparisonExpression + | andOrExpression '|' comparisonExpression + | andOrExpression '&&' comparisonExpression + | andOrExpression '||' comparisonExpression + ; + +comparisonExpression + : additionExpression + | comparisonExpression ('<<' | '>>') additionExpression + | comparisonExpression (Less | '<=' | Greater | '>=') additionExpression + | comparisonExpression ('===' | '!==' | '==' | IsNotEq) additionExpression + ; + +additionExpression + : multiplicationExpression + | additionExpression ('+' | '-' | '.') multiplicationExpression + ; + +multiplicationExpression + : notLeftRecursionExpression + | notLeftRecursionExpression '**' multiplicationExpression + | multiplicationExpression InstanceOf typeRef + | multiplicationExpression ('*' | Divide | '%') notLeftRecursionExpression + ; + +notLeftRecursionExpression + : Clone expression #CloneExpression + | newExpr #NewExpression + + | stringConstant '[' expression ']' #IndexerExpression + + | '(' castOperation ')' expression #CastExpression + | ('~' | '@') expression #UnaryOperatorExpression + + | ('!' | '+' | '-') expression #UnaryOperatorExpression + + | ('++' | '--') chain #PrefixIncDecExpression + | chain ('++' | '--') #PostfixIncDecExpression + + | chain assignmentOperator expression #AssignmentExpression + | chain Eq '&' (chain | newExpr) #AssignmentExpression + + | Print expression #PrintExpression + + | chain #ChainExpression + | constant #ScalarExpression + | string #ScalarExpression + | Label #ScalarExpression + + | BackQuoteString #BackQuoteStringExpression + | parenthesis #ParenthesisExpression + | (Array '(' arrayItemList? ')' | '[' arrayItemList? ']') ('[' expression ']')? #ArrayCreationExpression + + | Yield #SpecialWordExpression + | List '(' assignmentList ')' Eq expression #SpecialWordExpression + | IsSet '(' chainList ')' #SpecialWordExpression + | Empty '(' chain ')' #SpecialWordExpression + | Eval '(' expression ')' #SpecialWordExpression + | Exit ( '(' ')' | parenthesis )? #SpecialWordExpression + | (Include | IncludeOnce) expression #SpecialWordExpression + | (Require | RequireOnce) expression #SpecialWordExpression + + | Static? Function '&'? '(' formalParameterList ')' lambdaFunctionUseVars? blockStatement #LambdaFunctionExpression + ; + +newExpr + : New typeRef arguments? + ; + +assignmentOperator + : Eq + | '+=' + | '-=' + | '*=' + | '**=' + | '/=' + | '.=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + ; + +yieldExpression + : Yield expression ('=>' expression)? + ; + +arrayItemList + : arrayItem (',' arrayItem)* ','? + ; + +arrayItem + : expression ('=>' expression)? + | (expression '=>')? '&' chain + ; + +lambdaFunctionUseVars + : Use '(' lambdaFunctionUseVar (',' lambdaFunctionUseVar)* ')' + ; + +lambdaFunctionUseVar + : '&'? VarName + ; + +qualifiedStaticTypeRef + : qualifiedNamespaceName genericDynamicArgs? + | Static + ; + +typeRef + : (qualifiedNamespaceName | indirectTypeRef) genericDynamicArgs? + | primitiveType + | Static + ; + +indirectTypeRef + : chainBase ('->' keyedFieldName)* + ; + +qualifiedNamespaceName + : Namespace? '\\'? namespaceNameList + ; + +namespaceNameList + : identifier ('\\' identifier)* + ; + +qualifiedNamespaceNameList + : qualifiedNamespaceName (',' qualifiedNamespaceName)* + ; + +arguments + : '(' ( actualArgument (',' actualArgument)* | yieldExpression)? ')' + ; + +actualArgument + : '...'? expression + | '&' chain + ; + +constantInititalizer + : constant + | string + | Array '(' (constantArrayItemList ','?)? ')' + | '[' (constantArrayItemList ','?)? ']' + | ('+'|'-') constantInititalizer + ; + +constantArrayItemList + : constantArrayItem (',' constantArrayItem)* + ; + +constantArrayItem + : constantInititalizer ('=>' constantInititalizer)? + ; + +constant + : Null + | literalConstant + | magicConstant + | classConstant + | qualifiedNamespaceName + ; + +literalConstant + : Real + | BooleanConstant + | numericConstant + | stringConstant + ; + +numericConstant + : Octal + | Decimal + | Hex + | Binary + ; + +classConstant + : (Class | Parent_) '::' (identifier | Constructor | Get | Set) + | (qualifiedStaticTypeRef | keyedVariable) '::' identifier + ; + +stringConstant + : Label + ; + +string + : StartHereDoc HereDocText+ + | StartNowDoc HereDocText+ + | SingleQuoteString + | DoubleQuote interpolatedStringPart* DoubleQuote + ; + +interpolatedStringPart + : StringPart + | chain + ; + +chainList + : chain (',' chain)* + ; + +chain + : (chainBase | functionCall | '(' newExpr ')') memberAccess* + ; + +memberAccess + : '->' keyedFieldName actualArguments? + ; + +functionCall + : functionCallName actualArguments + ; + +functionCallName + : qualifiedNamespaceName + | classConstant + | chainBase + ; + +actualArguments + : genericDynamicArgs? arguments squareCurlyExpression* + ; + +chainBase + : keyedVariable ('::' keyedVariable)? + | qualifiedStaticTypeRef '::' keyedVariable + ; + +keyedFieldName + : keyedSimpleFieldName + | keyedVariable + ; + +keyedSimpleFieldName + : (identifier | OpenCurlyBracket expression '}') squareCurlyExpression* + ; + +keyedVariable + : Dollar* (VarName | Dollar OpenCurlyBracket expression '}') squareCurlyExpression* + ; + +squareCurlyExpression + : '[' expression? ']' + | OpenCurlyBracket expression '}' + ; + +assignmentList + : assignmentListElement? (',' assignmentListElement?)* + ; + +assignmentListElement + : chain + | List '(' assignmentList ')' + ; + +modifier + : Abstract + | Final + ; + +identifier + : Label + + | Abstract + | Array + | As + | BinaryCast + | BoolType + | BooleanConstant + | Break + | Callable + | Case + | Catch + | Class + | Clone + | Const + | Continue + | Declare + | Default + | Do + | DoubleCast + | DoubleType + | Echo + | Else + | ElseIf + | Empty + | EndDeclare + | EndFor + | EndForeach + | EndIf + | EndSwitch + | EndWhile + | Eval + | Exit + | Extends + | Final + | Finally + | FloatCast + | For + | Foreach + | Function + | Global + | Goto + | If + | Implements + | Import + | Include + | IncludeOnce + | InstanceOf + | InsteadOf + | Int16Cast + | Int64Type + | Int8Cast + | Interface + | IntType + | IsSet + | List + | LogicalAnd + | LogicalOr + | LogicalXor + | Namespace + | New + | Null + | ObjectType + | Parent_ + | Partial + | Print + | Private + | Protected + | Public + | Require + | RequireOnce + | Resource + | Return + | Static + | StringType + | Switch + | Throw + | Trait + | Try + | Typeof + | UintCast + | UnicodeCast + | Unset + | Use + | Var + | While + | Yield + + | Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + | Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +memberModifier + : Public + | Protected + | Private + | Static + | Abstract + | Final + ; + +magicConstant + : Namespace__ + | Class__ + | Traic__ + | Function__ + | Method__ + | Line__ + | File__ + | Dir__ + ; + +magicMethod + : Get + | Set + | Call + | CallStatic + | Constructor + | Destruct + | Wakeup + | Sleep + | Autoload + | IsSet__ + | Unset__ + | ToString__ + | Invoke + | SetState + | Clone__ + | DebugInfo + ; + +primitiveType + : BoolType + | IntType + | Int64Type + | DoubleType + | StringType + | Resource + | ObjectType + | Array + ; + +castOperation + : BoolType + | Int8Cast + | Int16Cast + | IntType + | Int64Type + | UintCast + | DoubleCast + | DoubleType + | FloatCast + | StringType + | BinaryCast + | UnicodeCast + | Array + | ObjectType + | Resource + | Unset + ; diff --git a/gen.antlr4-php/src/main/java/com/github/gumtreediff/gen/php71/PhpTreeGenerator.java b/gen.antlr4-php/src/main/java/com/github/gumtreediff/gen/php71/PhpTreeGenerator.java new file mode 100644 index 000000000..e8fefbe8f --- /dev/null +++ b/gen.antlr4-php/src/main/java/com/github/gumtreediff/gen/php71/PhpTreeGenerator.java @@ -0,0 +1,111 @@ +/* + * This file is part of GumTree. + * + * GumTree is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GumTree is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GumTree. If not, see . + * + * Copyright 2011-2015 Jean-Rémy Falleri + * Copyright 2011-2015 Floréal Morandat + * Copyright 2017 Mikulas Dite + */ + +package com.github.gumtreediff.gen.php71; + +import com.github.gumtreediff.gen.Register; +import com.github.gumtreediff.gen.Registry; +import com.github.gumtreediff.gen.antlr4.AbstractAntlr4TreeGenerator; +import com.github.gumtreediff.tree.ITree; +import com.github.gumtreediff.tree.TreeContext; +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeWalker; +import org.antlr.v4.runtime.tree.TerminalNode; + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; + +@Register(id = "php-antlr", accept = "\\.php.?$") +public class PhpTreeGenerator extends AbstractAntlr4TreeGenerator { + + private CommonTokenStream tokens; + private HashMap rules = new HashMap(); + + @Override + protected ParseTree getTree(Reader r) throws RecognitionException, IOException { + ANTLRInputStream stream = new ANTLRInputStream(r); + PHPLexer l = new PHPLexer(stream); + + tokens = new CommonTokenStream(l); + PHPParser p = new PHPParser(tokens); + p.setBuildParseTree(true); + + // reverse name->index rule map + for (Map.Entry entry : p.getRuleIndexMap().entrySet()) { + rules.put(entry.getValue(), entry.getKey()); + } + + return p.phpBlock(); + } + + @Override + protected final String[] getTokenNames() { + return PHPParser.tokenNames; + } + + private ITree getTree(TreeContext context, ParseTree ct) { + int index; + String name = ct.getClass().getSimpleName(); + + if (ct instanceof ParserRuleContext) { + index = ((ParserRuleContext) ct).getRuleIndex(); + // Some simple rules extend rules without changing rule index, + // if that is the case, set node name to that parent. + name = rules.get(index); + + } else { + assert ct instanceof TerminalNode; + index = 100000; // assumes there are not more rules in parser than this + } + + return context.createTree(index, null, name); + } + + @Override + protected void buildTree(TreeContext context, ITree root, ParseTree ct) { + int childrenCount = ct.getChildCount(); + + ITree tree = getTree(context, ct); + tree.setParentAndUpdateChildren(root); + + Token firstToken = tokens.get(ct.getSourceInterval().a); + Token lastToken = tokens.get(ct.getSourceInterval().b == -1 + ? ct.getSourceInterval().a : ct.getSourceInterval().b); + + tree.setPos(firstToken.getStartIndex()); + tree.setLength(lastToken.getStopIndex() - tree.getPos() + 1); // count last char + + if (ct instanceof TerminalNode) { + tree.setLabel(ct.getText()); + } else { + tree.setLabel(ct.getClass().getSimpleName()); + } + + for (int childIndex = 0; childIndex < childrenCount; childIndex++) { + ParseTree cct = ct.getChild(childIndex); + + buildTree(context, tree, cct); + } + } +} diff --git a/gen.antlr4/build.gradle b/gen.antlr4/build.gradle new file mode 100644 index 000000000..bc5d983f6 --- /dev/null +++ b/gen.antlr4/build.gradle @@ -0,0 +1 @@ +description = 'GumTree abstract AntLR4 module.' diff --git a/gen.antlr4/src/main/java/com/github/gumtreediff/gen/antlr4/AbstractAntlr4TreeGenerator.java b/gen.antlr4/src/main/java/com/github/gumtreediff/gen/antlr4/AbstractAntlr4TreeGenerator.java new file mode 100644 index 000000000..191ab9a21 --- /dev/null +++ b/gen.antlr4/src/main/java/com/github/gumtreediff/gen/antlr4/AbstractAntlr4TreeGenerator.java @@ -0,0 +1,68 @@ +/* + * This file is part of GumTree. + * + * GumTree is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GumTree is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with GumTree. If not, see . + * + * Copyright 2011-2015 Jean-Rémy Falleri + * Copyright 2011-2015 Floréal Morandat + * Copyright 2017 Mikulas Dite + */ + +package com.github.gumtreediff.gen.antlr4; + +import com.github.gumtreediff.gen.TreeGenerator; +import com.github.gumtreediff.tree.ITree; +import com.github.gumtreediff.tree.TreeContext; +import org.antlr.v4.runtime.*; +import org.antlr.v4.runtime.tree.*; + +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Map; + +public abstract class AbstractAntlr4TreeGenerator extends TreeGenerator { + + public AbstractAntlr4TreeGenerator() { + } + + protected abstract ParseTree getTree(Reader r) throws RecognitionException, IOException; + + @Override + public TreeContext generate(Reader r) throws IOException { + ParseTree tree = getTree(r); + + TreeContext context = new TreeContext(); + ITree root = context.createTree(1, "label", "typeLabel"); + context.setRoot(root); + + try { + buildTree(context, root, tree); + + } catch (NullPointerException e) { + System.out.println(e.getMessage()); + e.printStackTrace(); + throw e; + } + + return context; + } + + protected abstract String[] getTokenNames(); + + @SuppressWarnings("unchecked") + abstract protected void buildTree(TreeContext context, ITree root, ParseTree ct); + +} diff --git a/settings.gradle b/settings.gradle index 53d6a6140..09665f980 100644 --- a/settings.gradle +++ b/settings.gradle @@ -9,9 +9,11 @@ include 'benchmark', 'gen.antlr-php', 'gen.antlr-r', 'gen.antlr-xml', + 'gen.antlr4', + 'gen.antlr4-php', 'gen.c', 'gen.css', 'gen.jdt', 'gen.js', 'gen.ruby', - 'gen.srcml' \ No newline at end of file + 'gen.srcml'