Skip to content

Commit

Permalink
Merge pull request #989 from CakeML/pan_errors
Browse files Browse the repository at this point in the history
Rudimentary error reporting for the Pancake parser
  • Loading branch information
IlmariReissumies committed Apr 11, 2024
2 parents 77aba46 + 9fa5ba7 commit 9c063a7
Show file tree
Hide file tree
Showing 8 changed files with 193 additions and 56 deletions.
2 changes: 2 additions & 0 deletions compiler/bootstrap/translation/from_pancake32ProgScript.sml
Expand Up @@ -678,6 +678,8 @@ val res = translate $ spec32 conv_Fun_def;

val res = translate $ spec32 conv_FunList_def;

val res = translate $ spec32 panLexerTheory.dest_lexErrorT_def;

val res = translate $ spec32 parse_funs_to_ast_def;

val res = translate $ spec32 parse_to_ast_def;
Expand Down
2 changes: 2 additions & 0 deletions compiler/bootstrap/translation/from_pancake64ProgScript.sml
Expand Up @@ -682,6 +682,8 @@ val res = translate $ spec64 conv_Fun_def;

val res = translate $ spec64 conv_FunList_def;

val res = translate $ spec64 panLexerTheory.dest_lexErrorT_def;

val res = translate $ spec64 parse_funs_to_ast_def;

val res = translate $ spec64 parse_to_ast_def;
Expand Down
34 changes: 26 additions & 8 deletions compiler/bootstrap/translation/pancake_parseProgScript.sml
Expand Up @@ -111,18 +111,36 @@ Theorem parse_side_lemma = Q.prove(`
assume_tac PEG_wellformed \\
drule_then strip_assume_tac pegexecTheory.peg_exec_total \\
first_x_assum $ qspec_then `x` strip_assume_tac \\
FULL_SIMP_TAC std_ss [pegexecTheory.peg_exec_def,
pegexecTheory.coreloop_def,
CaseEq "option",
pegexecTheory.evalcase_distinct,
SIMP_CONV (srw_ss()) [pancake_peg_def] ``pancake_peg.start``,
IS_SOME_EXISTS] \\
qexists_tac `Result r`\\
gvs [pegexecTheory.peg_exec_def,
pegexecTheory.coreloop_def,
AllCaseEqs(),
pegexecTheory.evalcase_distinct,
SIMP_CONV (srw_ss()) [pancake_peg_def] ``pancake_peg.start``,
IS_SOME_EXISTS]
>- (rename1 ‘_ = SOME (Result rr)’ \\
qexists_tac `Result rr`\\
pop_assum (REWRITE_TAC o single o GSYM) \\
rpt (AP_THM_TAC ORELSE AP_TERM_TAC) \\
rw[FUN_EQ_THM] \\
rpt(PURE_FULL_CASE_TAC >> gvs[FDOM_FLOOKUP]) \\
gvs [flookup_thm]) \\
assume_tac PEG_FunNT_wellformed \\
drule_then strip_assume_tac pegexecTheory.peg_exec_total \\
first_x_assum $ qspec_then `x` strip_assume_tac \\
gvs [pegexecTheory.peg_exec_def,
pegexecTheory.coreloop_def,
AllCaseEqs(),
pegexecTheory.evalcase_distinct,
SIMP_CONV (srw_ss()) [pancake_peg_def] ``pancake_peg.start``,
IS_SOME_EXISTS] \\
rename1 ‘_ = SOME (Result rr)’ \\
qexists_tac `Result rr`\\
pop_assum (REWRITE_TAC o single o GSYM) \\
rpt (AP_THM_TAC ORELSE AP_TERM_TAC) \\
rw[FUN_EQ_THM] \\
rpt(PURE_FULL_CASE_TAC >> gvs[FDOM_FLOOKUP]) \\
gvs [flookup_thm])
gvs [flookup_thm]
)
|> update_precondition;

val _ = ml_translatorLib.ml_prog_update (ml_progLib.close_module NONE);
Expand Down
8 changes: 6 additions & 2 deletions compiler/compilerScript.sml
Expand Up @@ -276,8 +276,12 @@ Definition compile_pancake_def:
compile_pancake c input =
let _ = empty_ffi (strlit "finished: start up") in
case panPtreeConversion$parse_funs_to_ast input of
| NONE => Failure (ParseError (strlit "Failed pancake parsing"))
| SOME funs =>
| INR errs =>
Failure $ ParseError $ concat $
MAP (λ(msg,loc). concat [msg; strlit " at ";
locs_to_string (implode input) (SOME loc); strlit "\n"])
errs
| INL funs =>
case scope_check funs of
| SOME (x, fname) => (Failure (ScopeError x fname))
| NONE =>
Expand Down
2 changes: 1 addition & 1 deletion pancake/parser/panConcreteExamplesScript.sml
Expand Up @@ -41,7 +41,7 @@ fun parse_pancake q =
EVAL “parse_funs_to_ast ^code”
end

val check_success = assert $ optionSyntax.is_some o rhs o concl
val check_success = assert $ sumSyntax.is_inl o rhs o concl

(** Examples can be written using quoted strings and passed to the ML
function parse_pancake. *)
Expand Down
43 changes: 31 additions & 12 deletions pancake/parser/panLexerScript.sml
Expand Up @@ -13,6 +13,7 @@ open HolKernel Parse boolLib bossLib stringLib numLib;

open arithmeticTheory stringTheory intLib listTheory locationTheory;
open ASCIInumbersTheory ASCIInumbersLib;
open mlstringTheory;

val _ = new_theory "panLexer";

Expand All @@ -32,11 +33,11 @@ Datatype:
| LBrakT | RBrakT | LCurT | RCurT
| AssignT
| KeywordT keyword
| LexErrorT
| LexErrorT mlstring
End

Datatype:
atom = NumberA int | WordA string | SymA string | ErrA
atom = NumberA int | WordA string | SymA string | ErrA mlstring
End

Definition isAtom_singleton_def:
Expand All @@ -58,6 +59,16 @@ Definition isAlphaNumOrWild_def:
isAlphaNumOrWild c ⇔ isAlphaNum c ∨ c = #"_"
End

Definition isLexErrorT_def:
(isLexErrorT (LexErrorT _) ⇔ T) ∧
(isLexErrorT _ ⇔ F)
End

Definition dest_lexErrorT_def:
(destLexErrorT (LexErrorT msg) = SOME msg) ∧
destLexErrorT _ = NONE
End

Definition get_token_def:
get_token s =
if s = "&" then AndT else
Expand Down Expand Up @@ -89,7 +100,7 @@ Definition get_token_def:
if s = "{" then LCurT else
if s = "}" then RCurT else
if s = "=" then AssignT else
LexErrorT
LexErrorT $ concat [«Unrecognised symbolic token: »; implode s]
End

Definition get_keyword_def:
Expand All @@ -116,9 +127,10 @@ Definition get_keyword_def:
if s = "true" then (KeywordT TrueK) else
if s = "false" then (KeywordT FalseK) else
if s = "fun" then (KeywordT FunK) else
if s = "" ∨ s = "@" then LexErrorT else
if 2 <= LENGTH s ∧ EL 0 s = #"@" then ForeignIdent (DROP 1 s) else
IdentT s
if s = "" then LexErrorT $ «Expected keyword, found empty string» else
if 2 <= LENGTH s ∧ EL 0 s = #"@" then ForeignIdent (DROP 1 s)
else
IdentT s
End

Definition token_of_atom_def:
Expand All @@ -127,7 +139,7 @@ Definition token_of_atom_def:
| NumberA i => IntT i
| WordA s => get_keyword s
| SymA s => get_token s
| ErrA => LexErrorT
| ErrA s => LexErrorT s
End

Definition read_while_def:
Expand Down Expand Up @@ -174,9 +186,8 @@ Proof
Induct
>> fs[skip_comment_def]
>> rw[]
>> sg ‘STRLEN str < STRLEN xs’
>- metis_tac[]
>> fs[LE]
>> res_tac
>> gvs[]
QED

Definition unhex_alt_def:
Expand Down Expand Up @@ -206,7 +217,7 @@ Definition next_atom_def:
rest)
else if isPREFIX "//" (c::cs) then (* comment *)
(case (skip_comment (TL cs) (next_loc 2 loc)) of
| NONE => SOME (ErrA, Locs loc (next_loc 2 loc), "")
| NONE => SOME (ErrA «Malformed comment», Locs loc (next_loc 2 loc), "")
| SOME (rest, loc') => next_atom rest loc')
else if isAtom_singleton c then
SOME (SymA (STRING c []), Locs loc loc, cs)
Expand All @@ -217,7 +228,7 @@ Definition next_atom_def:
let (n, rest) = read_while isAlphaNumOrWild cs [c] in
SOME (WordA n, Locs loc (next_loc (LENGTH n) loc), rest)
else (* input not recognised *)
SOME (ErrA, Locs loc loc, cs)
SOME (ErrA $ concat [«Unrecognised symbol: »; str c], Locs loc loc, cs)
Termination
WF_REL_TAC ‘measure (LENGTH o FST)’
>> REPEAT STRIP_TAC
Expand Down Expand Up @@ -281,6 +292,14 @@ Definition pancake_lex_def:
pancake_lex input = pancake_lex_aux input init_loc
End

Definition safe_pancake_lex_def:
safe_pancake_lex input =
(let output = pancake_lex input in
case FILTER (isLexErrorT ∘ FST) output of
[] => INL output
| xs => INR $ MAP ((the «» ∘ destLexErrorT) ## I) xs)
End

(* Tests :
EVAL “pancake_ex "x + 1 --Then check y\n && y - 2 <= -3 || !z”;
*)
Expand Down

0 comments on commit 9c063a7

Please sign in to comment.