diff --git a/COPYRIGHT b/COPYRIGHT index f9f5cd6..c6fb875 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,4 +1,4 @@ -Copyright (c) 2008 Robert Virding. All rights reserved. +Copyright (c) 2008,2009 Robert Virding. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions diff --git a/doc/leex.txt b/doc/leex.txt index b4b581c..f7c6d3b 100644 --- a/doc/leex.txt +++ b/doc/leex.txt @@ -24,29 +24,61 @@ EXPORTS file(FileName) -> ok | error file(FileName, Options) -> ok | error + Generate a lexical analyzer from the definition in the input + file. The input file has the extension .xrl. This is added to + the filename if it is not given. The resulting module is the + Xrl filename without the .xrl extension. + The current options are: + dfa_graph + generate a .dot file which contains a desciption of + the DFA in a format which can be viewd with Graphviz, + www.graphviz.com + {includefile,File} Use a specific or customised prologue file instead of default leex/include/leexinc.hrl which is otherwise included. - {outdir,Dir} - generate the scanner file in directory Dir + {report_errors, bool()} + Causes errors to be printed as they occur. Default is + true. - dfa_graph - generate a .dot file which contains a desciption of - the DFA in a format which can be viewd with Graphviz, - www.graphviz.com + {report_warnings, bool()} + Causes warnings to be printed as they occur. Default + is true. + + {report, bool()} + This is a short form for both report_errors and + report_warnings. + + {return_errors, bool()} + If this flag is set, {error, Errors, Warnings} is + returned when there are errors. Default is false. + + {return_warnings, bool()} + If this flag is set, an extra field containing + Warnings is added to the tuple returned upon + success. Default is false. + + {return, bool()} + This is a short form for both return_errors and + return_warnings. - verbose - Output information from parsing the input file and + {scannerfile, ScannerFile} + ScannerFile is the name of the file that will contain + the Erlang scanner code that is generated. The default + ("") is to add the extension .erl to FileName stripped + of the .xrl extension. + + {verbose,bool()} + Outputs information from parsing the input file and generating the internal tables. - Generate a lexical analyzer from the definition in the input - file. The input file has the extension .xrl. This is added to - the filename if it is not given. The resulting module is the - Xrl filename without the .xrl extension. + Any of the Boolean options can be set to true by stating the + name of the option. For example, verbose is equivalent to + {verbose, true}. GENERATED SCANNER EXPORTS @@ -262,6 +294,10 @@ Regular Expressions delete \ddd the octal value ddd + \xhh + the hexadecimal value hh + \x{h...} + the hexadecimal value h... \c any other character literally, for example \\ for backslash, \" for ") @@ -276,9 +312,9 @@ Regular Expressions N.B. Anchoring a regular expression with ^ and $ is not implemented in the current version of leex and just generates - a nasty error. + a parse error. AUTHORS Robert Virding - rvirding@gmail.com - Copyright © 2008 Robert Virding + Copyright © 2008,2009 Robert Virding diff --git a/doc/src/leex.xml b/doc/src/leex.xml index 5a606c2..1101278 100644 --- a/doc/src/leex.xml +++ b/doc/src/leex.xml @@ -8,7 +8,7 @@ Ericsson AB. All Rights Reserved. - Copyright (c) 2008 Robert Virding. All rights reserved. + Copyright (c) 2008,2009 Robert Virding. All rights reserved. leex @@ -450,6 +450,6 @@ Floats (\\+|-)?[0-9]+\\.[0-9]+((E|e)(\\+|-)?[0-9]+)?

Anchoring a regular expression with ^ and $ is not implemented in the current version of Leex and just - generates an error.

+ generates a parse error.

diff --git a/ebin/leex.beam b/ebin/leex.beam index 3cbb757..c6cd840 100644 Binary files a/ebin/leex.beam and b/ebin/leex.beam differ diff --git a/src/ChangeLog b/src/ChangeLog index 7077505..2f62afc 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2009-07-29 Robert Virding + + * leex.erl (parse_rule): Removed parse check of action tokens, + gives false errors (macros) and misses some errors. + 2009-07-24 Robert Virding * leex.erl (re_parse): Pass state through parsing and check for diff --git a/src/leex.erl b/src/leex.erl index 216630c..16ebdd6 100644 --- a/src/leex.erl +++ b/src/leex.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2008 Robert Virding. All rights reserved. +%% Copyright (c) 2008,2009 Robert Virding. All rights reserved. %% %% Redistribution and use in source and binary forms, with or without %% modification, are permitted provided that the following conditions @@ -36,9 +36,9 @@ %%-compile(export_all). --import(lists, [member/2,reverse/1,sort/1,keysearch/3,keysort/2,keydelete/3, - map/2,foldl/3,foreach/2,flatmap/2,mapfoldl/3, - delete/2]). +-import(lists, [member/2,reverse/1,sort/1,delete/2, + keysearch/3,keysort/2,keydelete/3,keymember/3, + map/2,foldl/3,foreach/2,flatmap/2,mapfoldl/3]). -import(string, [substr/2,substr/3,span/2,tokens/2,join/2]). -import(ordsets, [is_element/2,add_element/2,union/2]). -import(orddict, [store/3]). @@ -139,8 +139,6 @@ format_error({regexp,E})-> missing_char -> "missing character" end, ["bad regexp `",Es,"'"]; -format_error({after_regexp,S}) -> - ["bad code after regexp ",io_lib:write_string(S)]; format_error(ignored_characters) -> "ignored characters". @@ -488,25 +486,16 @@ parse_rule(S, Line, Atoks, Ms, N, St) -> case parse_rule_regexp(S, Ms, St) of {ok,R} -> %%io:fwrite("RE = ~p~n", [R]), - case erl_parse:parse_exprs(Atoks) of - {ok,_Aes} -> - %% Check for token variables. - TokenChars = var_used('TokenChars', Atoks), - TokenLen = var_used('TokenLen', Atoks), - TokenLine = var_used('TokenLine', Atoks), - {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St}; - {error,_} -> - add_error({Line,leex,{after_regexp,S}}, St) - end; + %% Check for token variables. + TokenChars = var_used('TokenChars', Atoks), + TokenLen = var_used('TokenLen', Atoks), + TokenLine = var_used('TokenLine', Atoks), + {ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St}; {error,E} -> add_error({Line,leex,E}, St) end. -var_used(Name, Toks) -> - case keysearch(Name, 3, Toks) of - {value,{var,_,Name}} -> true; - _ -> false - end. +var_used(Name, Toks) -> keymember(Name, 3, Toks). %% parse_rule_regexp(RegExpString, Macros, State) -> {ok,RegExp} | {error,Error}. %% Substitute in macros and parse RegExpString. Cannot use re:replace @@ -519,8 +508,7 @@ parse_rule_regexp(RE0, [{M,Exp}|Ms], St) -> parse_rule_regexp(RE, [], St) -> %%io:fwrite("RE = ~p~n", [RE]), case re_parse(RE, St) of - {ok,R,[]} -> {ok,R}; - {ok,_,[C|_]} -> {error,{regexp,{illegal_char,[C]}}}; + {ok,R} -> {ok,R}; {error,E} -> {error,{regexp,E}} end. @@ -593,11 +581,12 @@ non_white(S) -> %% The grammar of the current regular expressions. The actual parser %% is a recursive descent implementation of the grammar. -%% re_parse(Chars, State) -> {ok,RegExp,RestChars} | {error,Error}. +%% re_parse(Chars, State) -> {ok,RegExp} | {error,Error}. re_parse(Cs0, St) -> case catch re_reg(Cs0, 0, St) of - {RE,_,Cs1} -> {ok,RE,Cs1}; + {RE,_,[]} -> {ok,RE}; + {_,_,[C|_]} -> {error,{illegal_char,[C]}}; {parse_error,E} -> {error,E} end. diff --git a/test/leex_SUITE.erl b/test/leex_SUITE.erl index e0a1568..c3cc318 100644 --- a/test/leex_SUITE.erl +++ b/test/leex_SUITE.erl @@ -173,9 +173,8 @@ syntax(Config) when is_list(Config) -> <<"Definitions.\n" "D = [0-9]\n" "Rules.\n" - "{L}+ : ">>), - ?line {error,[{_,[{4,leex,{after_regexp,_}}]}],[]} = - leex:file(Filename, Ret), + "{L}+ : \n">>), + ?line {error,[{_,[{5,leex,missing_code}]}],[]} = leex:file(Filename, Ret), ?line ok = file:write_file(Filename, <<"Definitions.\n" "D = [0-9]\n"