forked from JetBrains-Research/astminer
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
321 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,261 @@ | ||
/* Reworked for grammar specificity by Reid Mckenzie. Did a bunch of | ||
work so that rather than reading "a bunch of crap in parens" some | ||
syntactic information is preserved and recovered. Dec. 14 2014. | ||
Converted to ANTLR 4 by Terence Parr. Unsure of provence. I see | ||
it commited by matthias.koester for clojure-eclipse project on | ||
Oct 5, 2009: | ||
https://code.google.com/p/clojure-eclipse/ | ||
Seems to me Laurent Petit had a version of this. I also see | ||
Jingguo Yao submitting a link to a now-dead github project on | ||
Jan 1, 2011. | ||
https://github.com/laurentpetit/ccw/tree/master/clojure-antlr-grammar | ||
Regardless, there are some issues perhaps related to "sugar"; | ||
I've tried to fix them. | ||
This parses https://github.com/weavejester/compojure project. | ||
I also note this is hardly a grammar; more like "match a bunch of | ||
crap in parens" but I guess that is LISP for you ;) | ||
*/ | ||
|
||
grammar Clojure; | ||
|
||
file: form * EOF; | ||
|
||
form: literal | ||
| list | ||
| vector | ||
| map | ||
| reader_macro | ||
; | ||
|
||
forms: form* ; | ||
|
||
list: '(' forms ')' ; | ||
|
||
vector: '[' forms ']' ; | ||
|
||
map: '{' (form form)* '}' ; | ||
|
||
set: '#{' forms '}' ; | ||
|
||
reader_macro | ||
: lambda | ||
| meta_data | ||
| regex | ||
| var_quote | ||
| host_expr | ||
| set | ||
| tag | ||
| discard | ||
| dispatch | ||
| deref | ||
| quote | ||
| backtick | ||
| unquote | ||
| unquote_splicing | ||
| gensym | ||
; | ||
|
||
// TJP added '&' (gather a variable number of arguments) | ||
quote | ||
: '\'' form | ||
; | ||
|
||
backtick | ||
: '`' form | ||
; | ||
|
||
unquote | ||
: '~' form | ||
; | ||
|
||
unquote_splicing | ||
: '~@' form | ||
; | ||
|
||
tag | ||
: '^' form form | ||
; | ||
|
||
deref | ||
: '@' form | ||
; | ||
|
||
gensym | ||
: SYMBOL '#' | ||
; | ||
|
||
lambda | ||
: '#(' form* ')' | ||
; | ||
|
||
meta_data | ||
: '#^' (map form | form) | ||
; | ||
|
||
var_quote | ||
: '#\'' symbol | ||
; | ||
|
||
host_expr | ||
: '#+' form form | ||
; | ||
|
||
discard | ||
: '#_' form | ||
; | ||
|
||
dispatch | ||
: '#' symbol form | ||
; | ||
|
||
regex | ||
: '#' string | ||
; | ||
|
||
literal | ||
: string | ||
| number | ||
| character | ||
| nil | ||
| BOOLEAN | ||
| keyword | ||
| symbol | ||
| param_name | ||
; | ||
|
||
string: STRING; | ||
hex: HEX; | ||
bin: BIN; | ||
bign: BIGN; | ||
number | ||
: FLOAT | ||
| hex | ||
| bin | ||
| bign | ||
| LONG | ||
; | ||
|
||
character | ||
: named_char | ||
| u_hex_quad | ||
| any_char | ||
; | ||
named_char: CHAR_NAMED ; | ||
any_char: CHAR_ANY ; | ||
u_hex_quad: CHAR_U ; | ||
|
||
nil: NIL; | ||
|
||
keyword: macro_keyword | simple_keyword; | ||
simple_keyword: ':' symbol; | ||
macro_keyword: ':' ':' symbol; | ||
|
||
symbol: ns_symbol | simple_sym; | ||
simple_sym: SYMBOL; | ||
ns_symbol: NS_SYMBOL; | ||
|
||
param_name: PARAM_NAME; | ||
|
||
// Lexers | ||
//-------------------------------------------------------------------- | ||
|
||
STRING : '"' ( ~'"' | '\\' '"' )* '"' ; | ||
|
||
// FIXME: Doesn't deal with arbitrary read radixes, BigNums | ||
FLOAT | ||
: '-'? [0-9]+ FLOAT_TAIL | ||
| '-'? 'Infinity' | ||
| '-'? 'NaN' | ||
; | ||
|
||
fragment | ||
FLOAT_TAIL | ||
: FLOAT_DECIMAL FLOAT_EXP | ||
| FLOAT_DECIMAL | ||
| FLOAT_EXP | ||
; | ||
|
||
fragment | ||
FLOAT_DECIMAL | ||
: '.' [0-9]+ | ||
; | ||
|
||
fragment | ||
FLOAT_EXP | ||
: [eE] '-'? [0-9]+ | ||
; | ||
fragment | ||
HEXD: [0-9a-fA-F] ; | ||
HEX: '0' [xX] HEXD+ ; | ||
BIN: '0' [bB] [10]+ ; | ||
LONG: '-'? [0-9]+[lL]?; | ||
BIGN: '-'? [0-9]+[nN]; | ||
|
||
CHAR_U | ||
: '\\' 'u'[0-9D-Fd-f] HEXD HEXD HEXD ; | ||
CHAR_NAMED | ||
: '\\' ( 'newline' | ||
| 'return' | ||
| 'space' | ||
| 'tab' | ||
| 'formfeed' | ||
| 'backspace' ) ; | ||
CHAR_ANY | ||
: '\\' . ; | ||
|
||
NIL : 'nil'; | ||
|
||
BOOLEAN : 'true' | 'false' ; | ||
|
||
SYMBOL | ||
: '.' | ||
| '/' | ||
| NAME | ||
; | ||
|
||
NS_SYMBOL | ||
: NAME '/' SYMBOL | ||
; | ||
|
||
PARAM_NAME: '%' ((('1'..'9')('0'..'9')*)|'&')? ; | ||
|
||
// Fragments | ||
//-------------------------------------------------------------------- | ||
|
||
fragment | ||
NAME: SYMBOL_HEAD SYMBOL_REST* (':' SYMBOL_REST+)* ; | ||
|
||
fragment | ||
SYMBOL_HEAD | ||
: ~('0' .. '9' | ||
| '^' | '`' | '\'' | '"' | '#' | '~' | '@' | ':' | '/' | '%' | '(' | ')' | '[' | ']' | '{' | '}' // FIXME: could be one group | ||
| [ \n\r\t,] // FIXME: could be WS | ||
) | ||
; | ||
|
||
fragment | ||
SYMBOL_REST | ||
: SYMBOL_HEAD | ||
| '0'..'9' | ||
| '.' | ||
; | ||
|
||
// Discard | ||
//-------------------------------------------------------------------- | ||
|
||
fragment | ||
WS : [ \n\r\t,] ; | ||
|
||
fragment | ||
COMMENT: ';' ~[\r\n]* ; | ||
|
||
TRASH | ||
: ( WS | COMMENT ) -> channel(HIDDEN) | ||
; |
27 changes: 27 additions & 0 deletions
27
src/main/kotlin/astminer/parse/antlr/clojure/ClojureParser.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package astminer.parse.antlr.clojure | ||
|
||
import me.vovak.antlr.parser.ClojureLexer | ||
import me.vovak.antlr.parser.ClojureParser | ||
import astminer.common.model.Parser | ||
import astminer.parse.antlr.SimpleNode | ||
import astminer.parse.antlr.convertAntlrTree | ||
import org.antlr.v4.runtime.CharStreams | ||
import org.antlr.v4.runtime.CommonTokenStream | ||
import java.io.InputStream | ||
import java.lang.Exception | ||
|
||
class ClojureParser : Parser<SimpleNode> { | ||
override fun parse(content: InputStream): SimpleNode? { | ||
return try { | ||
val lexer = ClojureLexer(CharStreams.fromStream(content)) | ||
lexer.removeErrorListeners() | ||
val tokens = CommonTokenStream(lexer) | ||
val parser = ClojureParser(tokens) | ||
parser.removeErrorListeners() | ||
val context = parser.file() | ||
convertAntlrTree(context, ClojureParser.ruleNames, ClojureParser.VOCABULARY) | ||
} catch (e: Exception) { | ||
return null | ||
} | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
src/test/kotlin/astminer/parse/antlr/clojure/ANTLRClojureParserTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package astminer.parse.antlr.clojure | ||
|
||
import org.junit.Assert | ||
import org.junit.Test | ||
import java.io.File | ||
import java.io.FileInputStream | ||
|
||
class ANTLRClojureParserTest { | ||
|
||
@Test | ||
fun testNodeIsNotNull() { | ||
val parser = ClojureParser() | ||
val file = File("testData/examples/1.clj") | ||
|
||
val node = parser.parse(FileInputStream(file)) | ||
Assert.assertNotNull("Parse tree for a valid file should not be null", node) | ||
} | ||
|
||
@Test | ||
fun testProjectParsing() { | ||
val parser = ClojureParser() | ||
val projectRoot = File("testData/examples") | ||
val trees = parser.parseWithExtension(projectRoot, "clj") | ||
Assert.assertEquals("There is only 1 file with .clj extension in 'testData/examples' folder",1, trees.size) | ||
trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
(ns helloworld.core) | ||
|
||
(defn -main | ||
"I can say 'Hello World'." | ||
[] | ||
(println "Hello, World!")) |