Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Java Backend: Add line number support. #217

Merged
merged 1 commit into from Dec 18, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions source/changelog
@@ -1,4 +1,5 @@
2.9.0 (Development version)
* Java: Add support for passing line numbers to parser.
* C: Emit correct function prototypes
* Java: Add an experimental ANTLR4 backend
* C, C++, OCaml, Java: Accept ' or " as comment delimiters [#146]
Expand Down
27 changes: 15 additions & 12 deletions source/src/BNFC/Backend/Java.hs
Expand Up @@ -124,7 +124,7 @@ makeJava options@Options{..} cf =
Nothing -> (a, b) : remDups as
pkgToDir :: String -> FilePath
pkgToDir s = replace '.' pathSeparator s ++ [pathSeparator]
parselexspec = parserLexerSelector lang javaLexerParser
parselexspec = parserLexerSelector lang javaLexerParser (Options.linenumbers options)
lexfun = cf2lex $ lexer parselexspec
parsefun = cf2parse $ parser parselexspec
parmake = makeparserdetails (parser parselexspec)
Expand Down Expand Up @@ -285,15 +285,18 @@ antlrtest = javaTest [ "org.antlr.v4.runtime","org.antlr.v4.runtime.atn"
showOpts (x:xs) | normCat x /= x = showOpts xs
| otherwise = text (firstLowerCase $ identCat x) : showOpts xs

parserLexerSelector :: String -> JavaLexerParser -> ParserLexerSpecification
parserLexerSelector _ JLexCup = ParseLexSpec
{ lexer = cf2JLex
parserLexerSelector :: String
-> JavaLexerParser
-> Bool -- ^Pass line numbers to the symbols
-> ParserLexerSpecification
parserLexerSelector _ JLexCup ln = ParseLexSpec
{ lexer = cf2JLex ln
, parser = cf2cup
, testclass = cuptest
}
parserLexerSelector _ JFlexCup =
(parserLexerSelector "" JLexCup){lexer = cf2JFlex}
parserLexerSelector l Antlr4 = ParseLexSpec
parserLexerSelector _ JFlexCup ln =
(parserLexerSelector "" JLexCup ln){lexer = cf2JFlex ln}
parserLexerSelector l Antlr4 _ = ParseLexSpec
{ lexer = cf2AntlrLex' l
, parser = cf2AntlrParse' l
, testclass = antlrtest
Expand All @@ -316,15 +319,15 @@ data CFToLexer = CF2Lex
}

-- | Instances of cf-lexergen bridges
cf2JLex, cf2JFlex :: CFToLexer
cf2JLex, cf2JFlex :: Bool -> CFToLexer

cf2JLex = CF2Lex
{ cf2lex = BNFC.Backend.Java.CFtoJLex15.cf2jlex False
cf2JLex ln = CF2Lex
{ cf2lex = BNFC.Backend.Java.CFtoJLex15.cf2jlex (ln, False)
, makelexerdetails = jlexmakedetails
}

cf2JFlex = CF2Lex
{ cf2lex = BNFC.Backend.Java.CFtoJLex15.cf2jlex True
cf2JFlex ln = CF2Lex
{ cf2lex = BNFC.Backend.Java.CFtoJLex15.cf2jlex (ln, True)
, makelexerdetails = jflexmakedetails
}

Expand Down
79 changes: 54 additions & 25 deletions source/src/BNFC/Backend/Java/CFtoJLex15.hs
Expand Up @@ -46,10 +46,10 @@ import BNFC.Backend.Common.NamedVariables
import Text.PrettyPrint

--The environment must be returned for the parser to use.
cf2jlex :: Bool -> String -> CF -> (Doc, SymEnv)
cf2jlex jflex packageBase cf = (vcat
cf2jlex :: (Bool, Bool) -> String -> CF -> (Doc, SymEnv)
cf2jlex (ln, jflex) packageBase cf = (vcat
[
prelude jflex packageBase,
prelude (ln, jflex) packageBase,
cMacros,
lexSymbols jflex env,
restOfJLex cf
Expand All @@ -60,28 +60,57 @@ cf2jlex jflex packageBase cf = (vcat
makeSymEnv (s:symbs) n = (s, "_SYMB_" ++ show n) : makeSymEnv symbs (n+1)

-- | File prelude
prelude :: Bool -> String -> Doc
prelude jflex packageBase = vcat
prelude :: (Bool, Bool) -> String -> Doc
prelude (ln, jflex) packageBase = vcat
[ "// This JLex file was machine-generated by the BNF converter"
, "package" <+> text packageBase <> ";"
, ""
, "import java_cup.runtime.*;"
, "%%"
, "%cup"
, "%unicode"
, "%line"
, (if ln
then vcat
[ "%line"
, (if jflex then "%column" else "")
, "%char" ]
else "")
, "%public"
, "%{"
, nest 2 $ vcat
[ "String pstring = new String();"
, "final int unknown = -1;"
, "ComplexSymbolFactory.Location left = new ComplexSymbolFactory.Location(unknown, unknown);"
, "ComplexSymbolFactory cf = new ComplexSymbolFactory();"
, positionDeclarations
, "public int line_num() { return (yyline+1); }"
, "public ComplexSymbolFactory.Location left_loc() {"
, " return new ComplexSymbolFactory.Location(yyline+1, yycolumn+1, yychar);"
, "}"
, "public ComplexSymbolFactory.Location right_loc() {"
, " ComplexSymbolFactory.Location right = left_loc();"
, (if ln
then "right.move(0, yylength(), yylength());"
else "")
, " return right;"
, "}"
, "public String buff()" <+> braces
(if jflex
then "return new String(zzBuffer,zzCurrentPos,10).trim();"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic number 10.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic number 10 was there before.

else "return new String(yy_buffer,yy_buffer_index,10).trim();")
]
, "%}"
]
where
positionDeclarations =
-- JFlex always defines yyline, yychar, yycolumn, even if unused.
if jflex then ""
else if ln then "int yycolumn = unknown - 1;"
else vcat
-- subtract one so that one based numbering still ends up with unknown.
[ "int yyline = unknown - 1;"
, "int yychar = unknown - 1;"
, "int yycolumn = unknown;" ]

--For now all categories are included.
--Optimally only the ones that are used should be generated.
Expand All @@ -103,21 +132,21 @@ cMacros = vcat [

-- |
-- >>> lexSymbols False [("foo","bar")]
-- <YYINITIAL>foo { return new Symbol(sym.bar); }
-- <YYINITIAL>foo { return cf.newSymbol("", sym.bar, left_loc(), right_loc()); }
-- >>> lexSymbols False [("\\","bar")]
-- <YYINITIAL>\\ { return new Symbol(sym.bar); }
-- <YYINITIAL>\\ { return cf.newSymbol("", sym.bar, left_loc(), right_loc()); }
-- >>> lexSymbols False [("/","bar")]
-- <YYINITIAL>/ { return new Symbol(sym.bar); }
-- <YYINITIAL>/ { return cf.newSymbol("", sym.bar, left_loc(), right_loc()); }
-- >>> lexSymbols True [("/","bar")]
-- <YYINITIAL>\/ { return new Symbol(sym.bar); }
-- <YYINITIAL>\/ { return cf.newSymbol("", sym.bar, left_loc(), right_loc()); }
-- >>> lexSymbols True [("~","bar")]
-- <YYINITIAL>\~ { return new Symbol(sym.bar); }
-- <YYINITIAL>\~ { return cf.newSymbol("", sym.bar, left_loc(), right_loc()); }
lexSymbols :: Bool -> SymEnv -> Doc
lexSymbols jflex ss = vcat $ map transSym ss
where
transSym (s,r) =
"<YYINITIAL>" <> text (escapeChars s) <> " { return new Symbol(sym."
<> text r <> "); }"
"<YYINITIAL>" <> text (escapeChars s) <> " { return cf.newSymbol(\"\", sym."
<> text r <> ", left_loc(), right_loc()); }"
--Helper function that escapes characters in strings
escapeChars :: String -> String
escapeChars = concatMap (escapeChar jflex)
Expand All @@ -130,24 +159,24 @@ restOfJLex cf = vcat
, ifC catString strStates
, ifC catChar chStates
, ifC catDouble
"<YYINITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? { return new Symbol(sym._DOUBLE_, new Double(yytext())); }"
"<YYINITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)? { return cf.newSymbol(\"\", sym._DOUBLE_, left_loc(), right_loc(), new Double(yytext())); }"
, ifC catInteger
"<YYINITIAL>{DIGIT}+ { return new Symbol(sym._INTEGER_, new Integer(yytext())); }"
"<YYINITIAL>{DIGIT}+ { return cf.newSymbol(\"\", sym._INTEGER_, left_loc(), right_loc(), new Integer(yytext())); }"
, ifC catIdent
"<YYINITIAL>{LETTER}{IDENT}* { return new Symbol(sym._IDENT_, yytext().intern()); }"
"<YYINITIAL>{LETTER}{IDENT}* { return cf.newSymbol(\"\", sym._IDENT_, left_loc(), right_loc(), yytext().intern()); }"
, "<YYINITIAL>[ \\t\\r\\n\\f] { /* ignore white space. */ }"
]
where
ifC cat s = if isUsedCat cf cat then s else ""
userDefTokens = vcat
[ "<YYINITIAL>" <> text (printRegJLex exp)
<+> "{ return new Symbol(sym." <> text (show name)
<> ", yytext().intern()); }"
<+> "{ return cf.newSymbol(\"\", sym." <> text (show name)
<> ", left_loc(), right_loc(), yytext().intern()); }"
| (name, exp) <- tokenPragmas cf ]
strStates = vcat --These handle escaped characters in Strings.
[ "<YYINITIAL>\"\\\"\" { yybegin(STRING); }"
[ "<YYINITIAL>\"\\\"\" { left = left_loc(); yybegin(STRING); }"
, "<STRING>\\\\ { yybegin(ESCAPED); }"
, "<STRING>\\\" { String foo = pstring; pstring = new String(); yybegin(YYINITIAL); return new Symbol(sym._STRING_, foo.intern()); }"
, "<STRING>\\\" { String foo = pstring; pstring = new String(); yybegin(YYINITIAL); return cf.newSymbol(\"\", sym._STRING_, left, right_loc(), foo.intern()); }"
, "<STRING>. { pstring += yytext(); }"
, "<ESCAPED>n { pstring += \"\\n\"; yybegin(STRING); }"
, "<ESCAPED>\\\" { pstring += \"\\\"\"; yybegin(STRING); }"
Expand All @@ -156,12 +185,12 @@ restOfJLex cf = vcat
, "<ESCAPED>. { pstring += yytext(); yybegin(STRING); }"
]
chStates = vcat --These handle escaped characters in Chars.
[ "<YYINITIAL>\"'\" { yybegin(CHAR); }"
[ "<YYINITIAL>\"'\" { left = left_loc(); yybegin(CHAR); }"
, "<CHAR>\\\\ { yybegin(CHARESC); }"
, "<CHAR>[^'] { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character(yytext().charAt(0))); }"
, "<CHARESC>n { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character('\\n')); }"
, "<CHARESC>t { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character('\\t')); }"
, "<CHARESC>. { yybegin(CHAREND); return new Symbol(sym._CHAR_, new Character(yytext().charAt(0))); }"
, "<CHAR>[^'] { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character(yytext().charAt(0))); }"
, "<CHARESC>n { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character('\\n')); }"
, "<CHARESC>t { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character('\\t')); }"
, "<CHARESC>. { yybegin(CHAREND); return cf.newSymbol(\"\", sym._CHAR_, left, right_loc(), new Character(yytext().charAt(0))); }"
, "<CHAREND>\"'\" {yybegin(YYINITIAL);}"
]

Expand Down
4 changes: 4 additions & 0 deletions testing/src/ParameterizedTests.hs
Expand Up @@ -166,10 +166,14 @@ parameters =
-- Java
, javaParams { tpName = "Java"
, tpBnfcOptions = ["--java", "-m"] }
, javaParams { tpName = "Java (with line numbers)"
, tpBnfcOptions = ["--java", "-m", "-l"] }
, javaParams { tpName = "Java (with namespace)"
, tpBnfcOptions = ["--java", "-p", "my.stuff", "-m"] }
, javaParams { tpName = "Java (with jflex)"
, tpBnfcOptions = ["--java", "--jflex", "-m"] }
, javaParams { tpName = "Java (with jflex and line numbers)"
, tpBnfcOptions = ["--java", "--jflex", "-m", "-l"] }
, javaParams { tpName = "Java (with antlr)"
, tpBnfcOptions = ["--java", "--antlr", "-m"] }
]
Expand Down