Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge branch 'develop', new consistent version.
  • Loading branch information
rvirding committed Sep 12, 2009
2 parents 8d64824 + 5b716f4 commit 90a1864
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 45 deletions.
2 changes: 1 addition & 1 deletion COPYRIGHT
@@ -1,4 +1,4 @@
Copyright (c) 2008 Robert Virding. All rights reserved.
Copyright (c) 2008,2009 Robert Virding. All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
Expand Down
64 changes: 50 additions & 14 deletions doc/leex.txt
Expand Up @@ -24,29 +24,61 @@ EXPORTS
file(FileName) -> ok | error
file(FileName, Options) -> ok | error

Generate a lexical analyzer from the definition in the input
file. The input file has the extension .xrl. This is added to
the filename if it is not given. The resulting module is the
Xrl filename without the .xrl extension.

The current options are:

dfa_graph
generate a .dot file which contains a desciption of
the DFA in a format which can be viewd with Graphviz,
www.graphviz.com

{includefile,File}
Use a specific or customised prologue file instead of
default leex/include/leexinc.hrl which is otherwise
included.

{outdir,Dir}
generate the scanner file in directory Dir
{report_errors, bool()}
Causes errors to be printed as they occur. Default is
true.

dfa_graph
generate a .dot file which contains a desciption of
the DFA in a format which can be viewd with Graphviz,
www.graphviz.com
{report_warnings, bool()}
Causes warnings to be printed as they occur. Default
is true.

{report, bool()}
This is a short form for both report_errors and
report_warnings.

{return_errors, bool()}
If this flag is set, {error, Errors, Warnings} is
returned when there are errors. Default is false.

{return_warnings, bool()}
If this flag is set, an extra field containing
Warnings is added to the tuple returned upon
success. Default is false.

{return, bool()}
This is a short form for both return_errors and
return_warnings.

verbose
Output information from parsing the input file and
{scannerfile, ScannerFile}
ScannerFile is the name of the file that will contain
the Erlang scanner code that is generated. The default
("") is to add the extension .erl to FileName stripped
of the .xrl extension.

{verbose,bool()}
Outputs information from parsing the input file and
generating the internal tables.

Generate a lexical analyzer from the definition in the input
file. The input file has the extension .xrl. This is added to
the filename if it is not given. The resulting module is the
Xrl filename without the .xrl extension.
Any of the Boolean options can be set to true by stating the
name of the option. For example, verbose is equivalent to
{verbose, true}.

GENERATED SCANNER EXPORTS

Expand Down Expand Up @@ -262,6 +294,10 @@ Regular Expressions
delete
\ddd
the octal value ddd
\xhh
the hexadecimal value hh
\x{h...}
the hexadecimal value h...
\c
any other character literally, for example \\ for
backslash, \" for ")
Expand All @@ -276,9 +312,9 @@ Regular Expressions

N.B. Anchoring a regular expression with ^ and $ is not
implemented in the current version of leex and just generates
a nasty error.
a parse error.

AUTHORS
Robert Virding - rvirding@gmail.com

Copyright � 2008 Robert Virding
Copyright � 2008,2009 Robert Virding
4 changes: 2 additions & 2 deletions doc/src/leex.xml
Expand Up @@ -8,7 +8,7 @@
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
Copyright (c) 2008 Robert Virding. All rights reserved.
Copyright (c) 2008,2009 Robert Virding. All rights reserved.
</legalnotice>

<title>leex</title>
Expand Down Expand Up @@ -450,6 +450,6 @@ Floats (\\+|-)?[0-9]+\\.[0-9]+((E|e)(\\+|-)?[0-9]+)?</code>

<note><p>Anchoring a regular expression with <c>^</c> and <c>$</c>
is not implemented in the current version of Leex and just
generates an error.</p></note>
generates a parse error.</p></note>
</section>
</erlref>
Binary file modified ebin/leex.beam
Binary file not shown.
5 changes: 5 additions & 0 deletions src/ChangeLog
@@ -1,3 +1,8 @@
2009-07-29 Robert Virding <rv@stanislaw.local>

* leex.erl (parse_rule): Removed parse check of action tokens,
gives false errors (macros) and misses some errors.

2009-07-24 Robert Virding <rv@stanislaw.local>

* leex.erl (re_parse): Pass state through parsing and check for
Expand Down
39 changes: 14 additions & 25 deletions src/leex.erl
@@ -1,4 +1,4 @@
%% Copyright (c) 2008 Robert Virding. All rights reserved.
%% Copyright (c) 2008,2009 Robert Virding. All rights reserved.
%%
%% Redistribution and use in source and binary forms, with or without
%% modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -36,9 +36,9 @@

%%-compile(export_all).

-import(lists, [member/2,reverse/1,sort/1,keysearch/3,keysort/2,keydelete/3,
map/2,foldl/3,foreach/2,flatmap/2,mapfoldl/3,
delete/2]).
-import(lists, [member/2,reverse/1,sort/1,delete/2,
keysearch/3,keysort/2,keydelete/3,keymember/3,
map/2,foldl/3,foreach/2,flatmap/2,mapfoldl/3]).
-import(string, [substr/2,substr/3,span/2,tokens/2,join/2]).
-import(ordsets, [is_element/2,add_element/2,union/2]).
-import(orddict, [store/3]).
Expand Down Expand Up @@ -139,8 +139,6 @@ format_error({regexp,E})->
missing_char -> "missing character"
end,
["bad regexp `",Es,"'"];
format_error({after_regexp,S}) ->
["bad code after regexp ",io_lib:write_string(S)];
format_error(ignored_characters) ->
"ignored characters".

Expand Down Expand Up @@ -488,25 +486,16 @@ parse_rule(S, Line, Atoks, Ms, N, St) ->
case parse_rule_regexp(S, Ms, St) of
{ok,R} ->
%%io:fwrite("RE = ~p~n", [R]),
case erl_parse:parse_exprs(Atoks) of
{ok,_Aes} ->
%% Check for token variables.
TokenChars = var_used('TokenChars', Atoks),
TokenLen = var_used('TokenLen', Atoks),
TokenLine = var_used('TokenLine', Atoks),
{ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St};
{error,_} ->
add_error({Line,leex,{after_regexp,S}}, St)
end;
%% Check for token variables.
TokenChars = var_used('TokenChars', Atoks),
TokenLen = var_used('TokenLen', Atoks),
TokenLine = var_used('TokenLine', Atoks),
{ok,{R,N},{N,Atoks,TokenChars,TokenLen,TokenLine},St};
{error,E} ->
add_error({Line,leex,E}, St)
end.

var_used(Name, Toks) ->
case keysearch(Name, 3, Toks) of
{value,{var,_,Name}} -> true;
_ -> false
end.
var_used(Name, Toks) -> keymember(Name, 3, Toks).

%% parse_rule_regexp(RegExpString, Macros, State) -> {ok,RegExp} | {error,Error}.
%% Substitute in macros and parse RegExpString. Cannot use re:replace
Expand All @@ -519,8 +508,7 @@ parse_rule_regexp(RE0, [{M,Exp}|Ms], St) ->
parse_rule_regexp(RE, [], St) ->
%%io:fwrite("RE = ~p~n", [RE]),
case re_parse(RE, St) of
{ok,R,[]} -> {ok,R};
{ok,_,[C|_]} -> {error,{regexp,{illegal_char,[C]}}};
{ok,R} -> {ok,R};
{error,E} -> {error,{regexp,E}}
end.

Expand Down Expand Up @@ -593,11 +581,12 @@ non_white(S) ->
%% The grammar of the current regular expressions. The actual parser
%% is a recursive descent implementation of the grammar.

%% re_parse(Chars, State) -> {ok,RegExp,RestChars} | {error,Error}.
%% re_parse(Chars, State) -> {ok,RegExp} | {error,Error}.

re_parse(Cs0, St) ->
case catch re_reg(Cs0, 0, St) of
{RE,_,Cs1} -> {ok,RE,Cs1};
{RE,_,[]} -> {ok,RE};
{_,_,[C|_]} -> {error,{illegal_char,[C]}};
{parse_error,E} -> {error,E}
end.

Expand Down
5 changes: 2 additions & 3 deletions test/leex_SUITE.erl
Expand Up @@ -173,9 +173,8 @@ syntax(Config) when is_list(Config) ->
<<"Definitions.\n"
"D = [0-9]\n"
"Rules.\n"
"{L}+ : ">>),
?line {error,[{_,[{4,leex,{after_regexp,_}}]}],[]} =
leex:file(Filename, Ret),
"{L}+ : \n">>),
?line {error,[{_,[{5,leex,missing_code}]}],[]} = leex:file(Filename, Ret),
?line ok = file:write_file(Filename,
<<"Definitions.\n"
"D = [0-9]\n"
Expand Down

0 comments on commit 90a1864

Please sign in to comment.