Skip to content

Commit

Permalink
adding clojure antlr parts
Browse files Browse the repository at this point in the history
  • Loading branch information
FiV0 committed Nov 16, 2019
1 parent 430bbc2 commit a058091
Show file tree
Hide file tree
Showing 4 changed files with 321 additions and 0 deletions.
261 changes: 261 additions & 0 deletions src/main/antlr/Clojure.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
/* Reworked for grammar specificity by Reid Mckenzie. Did a bunch of
work so that rather than reading "a bunch of crap in parens" some
syntactic information is preserved and recovered. Dec. 14 2014.
Converted to ANTLR 4 by Terence Parr. Unsure of provence. I see
it commited by matthias.koester for clojure-eclipse project on
Oct 5, 2009:
https://code.google.com/p/clojure-eclipse/
Seems to me Laurent Petit had a version of this. I also see
Jingguo Yao submitting a link to a now-dead github project on
Jan 1, 2011.
https://github.com/laurentpetit/ccw/tree/master/clojure-antlr-grammar
Regardless, there are some issues perhaps related to "sugar";
I've tried to fix them.
This parses https://github.com/weavejester/compojure project.
I also note this is hardly a grammar; more like "match a bunch of
crap in parens" but I guess that is LISP for you ;)
*/

grammar Clojure;

file: form * EOF;

form: literal
| list
| vector
| map
| reader_macro
;

forms: form* ;

list: '(' forms ')' ;

vector: '[' forms ']' ;

map: '{' (form form)* '}' ;

set: '#{' forms '}' ;

reader_macro
: lambda
| meta_data
| regex
| var_quote
| host_expr
| set
| tag
| discard
| dispatch
| deref
| quote
| backtick
| unquote
| unquote_splicing
| gensym
;

// TJP added '&' (gather a variable number of arguments)
quote
: '\'' form
;

backtick
: '`' form
;

unquote
: '~' form
;

unquote_splicing
: '~@' form
;

tag
: '^' form form
;

deref
: '@' form
;

gensym
: SYMBOL '#'
;

lambda
: '#(' form* ')'
;

meta_data
: '#^' (map form | form)
;

var_quote
: '#\'' symbol
;

host_expr
: '#+' form form
;

discard
: '#_' form
;

dispatch
: '#' symbol form
;

regex
: '#' string
;

literal
: string
| number
| character
| nil
| BOOLEAN
| keyword
| symbol
| param_name
;

string: STRING;
hex: HEX;
bin: BIN;
bign: BIGN;
number
: FLOAT
| hex
| bin
| bign
| LONG
;

character
: named_char
| u_hex_quad
| any_char
;
named_char: CHAR_NAMED ;
any_char: CHAR_ANY ;
u_hex_quad: CHAR_U ;

nil: NIL;

keyword: macro_keyword | simple_keyword;
simple_keyword: ':' symbol;
macro_keyword: ':' ':' symbol;

symbol: ns_symbol | simple_sym;
simple_sym: SYMBOL;
ns_symbol: NS_SYMBOL;

param_name: PARAM_NAME;

// Lexers
//--------------------------------------------------------------------

STRING : '"' ( ~'"' | '\\' '"' )* '"' ;

// FIXME: Doesn't deal with arbitrary read radixes, BigNums
FLOAT
: '-'? [0-9]+ FLOAT_TAIL
| '-'? 'Infinity'
| '-'? 'NaN'
;

fragment
FLOAT_TAIL
: FLOAT_DECIMAL FLOAT_EXP
| FLOAT_DECIMAL
| FLOAT_EXP
;

fragment
FLOAT_DECIMAL
: '.' [0-9]+
;

fragment
FLOAT_EXP
: [eE] '-'? [0-9]+
;
fragment
HEXD: [0-9a-fA-F] ;
HEX: '0' [xX] HEXD+ ;
BIN: '0' [bB] [10]+ ;
LONG: '-'? [0-9]+[lL]?;
BIGN: '-'? [0-9]+[nN];

CHAR_U
: '\\' 'u'[0-9D-Fd-f] HEXD HEXD HEXD ;
CHAR_NAMED
: '\\' ( 'newline'
| 'return'
| 'space'
| 'tab'
| 'formfeed'
| 'backspace' ) ;
CHAR_ANY
: '\\' . ;

NIL : 'nil';

BOOLEAN : 'true' | 'false' ;

SYMBOL
: '.'
| '/'
| NAME
;

NS_SYMBOL
: NAME '/' SYMBOL
;

PARAM_NAME: '%' ((('1'..'9')('0'..'9')*)|'&')? ;

// Fragments
//--------------------------------------------------------------------

fragment
NAME: SYMBOL_HEAD SYMBOL_REST* (':' SYMBOL_REST+)* ;

fragment
SYMBOL_HEAD
: ~('0' .. '9'
| '^' | '`' | '\'' | '"' | '#' | '~' | '@' | ':' | '/' | '%' | '(' | ')' | '[' | ']' | '{' | '}' // FIXME: could be one group
| [ \n\r\t,] // FIXME: could be WS
)
;

fragment
SYMBOL_REST
: SYMBOL_HEAD
| '0'..'9'
| '.'
;

// Discard
//--------------------------------------------------------------------

fragment
WS : [ \n\r\t,] ;

fragment
COMMENT: ';' ~[\r\n]* ;

TRASH
: ( WS | COMMENT ) -> channel(HIDDEN)
;
27 changes: 27 additions & 0 deletions src/main/kotlin/astminer/parse/antlr/clojure/ClojureParser.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package astminer.parse.antlr.clojure

import me.vovak.antlr.parser.ClojureLexer
import me.vovak.antlr.parser.ClojureParser
import astminer.common.model.Parser
import astminer.parse.antlr.SimpleNode
import astminer.parse.antlr.convertAntlrTree
import org.antlr.v4.runtime.CharStreams
import org.antlr.v4.runtime.CommonTokenStream
import java.io.InputStream
import java.lang.Exception

class ClojureParser : Parser<SimpleNode> {
override fun parse(content: InputStream): SimpleNode? {
return try {
val lexer = ClojureLexer(CharStreams.fromStream(content))
lexer.removeErrorListeners()
val tokens = CommonTokenStream(lexer)
val parser = ClojureParser(tokens)
parser.removeErrorListeners()
val context = parser.file()
convertAntlrTree(context, ClojureParser.ruleNames, ClojureParser.VOCABULARY)
} catch (e: Exception) {
return null
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package astminer.parse.antlr.clojure

import org.junit.Assert
import org.junit.Test
import java.io.File
import java.io.FileInputStream

class ANTLRClojureParserTest {

@Test
fun testNodeIsNotNull() {
val parser = ClojureParser()
val file = File("testData/examples/1.clj")

val node = parser.parse(FileInputStream(file))
Assert.assertNotNull("Parse tree for a valid file should not be null", node)
}

@Test
fun testProjectParsing() {
val parser = ClojureParser()
val projectRoot = File("testData/examples")
val trees = parser.parseWithExtension(projectRoot, "clj")
Assert.assertEquals("There is only 1 file with .clj extension in 'testData/examples' folder",1, trees.size)
trees.forEach { Assert.assertNotNull("Parse tree for a valid file should not be null", it) }
}
}
6 changes: 6 additions & 0 deletions testData/examples/1.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
(ns helloworld.core)

(defn -main
"I can say 'Hello World'."
[]
(println "Hello, World!"))

0 comments on commit a058091

Please sign in to comment.