Skip to content

Commit

Permalink
Merge pull request #968 from CakeML/candle-parser-feature-adds
Browse files Browse the repository at this point in the history
Candle parser fixes and additions
  • Loading branch information
myreen committed Aug 22, 2023
2 parents 5696cf2 + edd0b3b commit a50a290
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 63 deletions.
16 changes: 14 additions & 2 deletions compiler/bootstrap/translation/caml_parserProgScript.sml
Expand Up @@ -140,6 +140,18 @@ val _ = update_precondition precparse_side;

val r = preprocess ptree_PPattern_def |> translate;

Theorem ptree_PPattern_side[local]:
(∀x. camlptreeconversion_ptree_ppattern_side x) ∧
(∀x. camlptreeconversion_ptree_ppatternlist_side x) ∧
(∀x y. camlptreeconversion_grabpairs_side x y)
Proof
ho_match_mp_tac ptree_PPattern_ind \\ rw []
\\ simp [Once (fetch "-" "camlptreeconversion_ptree_ppattern_side_def")]
\\ rw [] \\ gs [caml_lexTheory.isInt_thm, SF SFY_ss]
QED

val _ = update_precondition ptree_PPattern_side;

val r = preprocess ptree_Pattern_def |> translate;

(* This takes a long time.
Expand All @@ -158,7 +170,8 @@ Theorem ptree_Expr_preconds[local]:
(∀x. camlptreeconversion_ptree_exprlist_side x) ∧
(∀x. camlptreeconversion_ptree_exprcommas_side x) ∧
(∀x. camlptreeconversion_ptree_update_side x) ∧
(∀x. camlptreeconversion_ptree_updates_side x)
(∀x. camlptreeconversion_ptree_updates_side x) ∧
(∀x. camlptreeconversion_ptree_index_side x)
Proof
ho_match_mp_tac ptree_Expr_ind
\\ strip_tac
Expand All @@ -182,7 +195,6 @@ QED

val _ = List.app (ignore o update_precondition) (CONJUNCTS ptree_Expr_preconds);


val r = translate partition_types_def;

Theorem camlptreeconversion_partition_types_side[local]:
Expand Down
83 changes: 51 additions & 32 deletions compiler/parsing/ocaml/camlPEGScript.sml
Expand Up @@ -232,13 +232,15 @@ Datatype:
(* expressions *)
| nLiteral | nIdent | nEBase | nEList
| nEApp | nEConstr | nEFunapp | nEAssert | nELazy
| nEPrefix | nENeg | nEShift | nEMult
| nEPrefix | nEIndex | nENeg | nEShift | nEMult
| nERecProj | nERecUpdate | nERecCons
| nEAdd | nECons | nECat | nERel
| nEAnd | nEOr | nEProd | nEAssign | nEIf | nESeq
| nEMatch | nETry | nEFun | nEFunction | nELet | nELetRec
| nEWhile | nEFor | nExpr
| nEUnclosed (* expressions that bind everything to the right *)
(* indexing *)
| nArrIdx | nStrIdx
(* record updates *)
| nUpdate | nUpdates | nFieldDec | nFieldDecs
(* pattern matches *)
Expand All @@ -264,7 +266,7 @@ Datatype:
| nSemis | nExprItem | nExprItems | nDefItem
(* misc *)
| nShiftOp | nMultOp | nAddOp | nRelOp | nAndOp | nOrOp | nCatOp | nPrefixOp
| nAssignOp | nStart
| nAssignOp | nPatLiteral | nStart
(* Declarations through CakeML pragmas *)
| nCakeMLPragma
End
Expand Down Expand Up @@ -576,9 +578,19 @@ Definition camlPEG_def[nocompute]:
(bindNT nPrefixOp));
(INL nEPrefix,
seql [try (pnt nPrefixOp); pnt nEBase] (bindNT nEPrefix));
(* -- Expr14.6 ------------------------------------------------------- *)
(INL nArrIdx,
seql [tokeq DotParenT; pnt nExpr; tokeq RparT]
(bindNT nArrIdx));
(INL nStrIdx,
seql [tokeq DotBrackT; pnt nExpr; tokeq RbrackT]
(bindNT nStrIdx));
(INL nEIndex,
seql [pnt nEPrefix; try (choicel [pnt nStrIdx; pnt nArrIdx])]
(bindNT nEIndex));
(* -- Expr14.5 ------------------------------------------------------- *)
(INL nERecProj,
seql [pnt nEPrefix;
seql [pnt nEIndex;
try (seql [tokeq DotT; pnt nFieldName] I)]
(bindNT nERecProj));
(* -- Expr14 --------------------------------------------------------- *)
Expand Down Expand Up @@ -737,7 +749,8 @@ Definition camlPEG_def[nocompute]:
(bindNT nPatternMatches));
(* -- Let bindings --------------------------------------------------- *)
(INL nLetRecBinding,
seql [pnt nValueName; pnt nPatterns;
seql [pnt nValueName;
try (pnt nPatterns);
try (seql [tokeq ColonT; pnt nType] I);
tokeq EqualT; pnt nExpr]
(bindNT nLetRecBinding));
Expand All @@ -746,7 +759,7 @@ Definition camlPEG_def[nocompute]:
(bindNT nLetRecBindings));
(INL nLetBinding,
pegf (choicel [seql [pnt nPattern; tokeq EqualT; pnt nExpr] I;
seql [pnt nValueName; pnt nPatterns;
seql [pnt nValueName; try (pnt nPatterns);
try (seql [tokeq ColonT; pnt nType] I);
tokeq EqualT; pnt nExpr] I])
(bindNT nLetBinding));
Expand All @@ -767,8 +780,12 @@ Definition camlPEG_def[nocompute]:
try (seql [tokeq ColonT; pnt nType] I)] I);
tokeq RparT]
(bindNT nPPar));
(INL nPatLiteral,
choicel [pegf (pnt nLiteral) (bindNT nPatLiteral);
seql [tokeq MinusT; tok isInt mktokLf]
(bindNT nPatLiteral)]);
(INL nPBase, (* ::= any / var / lit / list / '(' p ')' *)
pegf (choicel [pnt nLiteral; pnt nValueName; pnt nPAny; pnt nPList;
pegf (choicel [pnt nPatLiteral; pnt nValueName; pnt nPAny; pnt nPList;
pnt nPPar])
(bindNT nPBase));
(* -- Pat2 ----------------------------------------------------------- *)
Expand Down Expand Up @@ -997,15 +1014,16 @@ val npeg0_rwts =
“nOperatorName”, “nConstrName”, “nTypeConstrName”, “nModuleName”,
“nValuePath”, “nConstr”, “nTypeConstr”, “nModulePath”, “nFieldName”,
“nUpdate”, “nUpdates”, “nERecUpdate”, “nERecCons”, “nLiteral”,
“nIdent”, “nEList”, “nEConstr”, “nEBase”, “nEPrefix”, “nERecProj”,
“nELazy”, “nEAssert”, “nEFunapp”, “nEApp”, “nLetBinding”, “nPAny”,
“nPList”, “nPPar”, “nPBase”, “nPCons”, “nPAs”, “nPOps”, “nPattern”,
“nPatterns”, “nLetBindings”, “nLetRecBinding”, “nLetRecBindings”,
“nPatternMatches”, “nPatternMatch”, “nEMatch”, “nETry”, “nEFun”,
“nEFunction”, “nELet”, “nELetRec”, “nEWhile”, “nEFor”, “nEUnclosed”,
“nENeg”, “nEShift”, “nEMult”, “nEAdd”, “nECons”, “nECat”, “nERel”,
“nEAnd”, “nEOr”, “nEHolInfix”, “nEProd”, “nEAssign”, “nEIf”, “nESeq”,
“nExpr”, “nTypeDefinition”, “nTypeDef”, “nTypeDefs”, “nTVar”, “nTBase”,
“nIdent”, “nEList”, “nEConstr”, “nEBase”, “nEPrefix”, “nArrIdx”,
“nStrIdx”, “nEIndex”, “nERecProj”, “nELazy”, “nEAssert”, “nEFunapp”,
“nEApp”, “nLetBinding”, “nPAny”, “nPList”, “nPPar”, “nPatLiteral”,
“nPBase”, “nPCons”, “nPAs”, “nPOps”, “nPattern”, “nPatterns”,
“nLetBindings”, “nLetRecBinding”, “nLetRecBindings”, “nPatternMatches”,
“nPatternMatch”, “nEMatch”, “nETry”, “nEFun”, “nEFunction”, “nELet”,
“nELetRec”, “nEWhile”, “nEFor”, “nEUnclosed”, “nENeg”, “nEShift”,
“nEMult”, “nEAdd”, “nECons”, “nECat”, “nERel”, “nEAnd”, “nEOr”,
“nEHolInfix”, “nEProd”, “nEAssign”, “nEIf”, “nESeq”, “nExpr”,
“nTypeDefinition”, “nTypeDef”, “nTypeDefs”, “nTVar”, “nTBase”,
“nTConstr”, “nTProd”, “nTFun”, “nType”, “nTypeList”, “nTypeLists”,
“nTypeParams”, “nConstrDecl”, “nTypeReprs”, “nTypeRepr”, “nTypeInfo”,
“nConstrArgs”, “nExcDefinition”, “nTopLet”, “nTopLetRec”, “nOpen”,
Expand Down Expand Up @@ -1035,23 +1053,24 @@ val topo_nts =
“nHolInfixOp”, “nCatOp”, “nPrefixOp”, “nAssignOp”, “nValueName”,
“nOperatorName”, “nConstrName”, “nTypeConstrName”, “nModuleName”,
“nModulePath”, “nValuePath”, “nConstr”, “nTypeConstr”, “nFieldName”,
“nLiteral”, “nIdent”, “nEList”, “nEConstr”, “nERecUpdate”,
“nERecCons”, “nEBase”, “nEPrefix”, “nERecProj”, “nELazy”, “nEAssert”,
“nEFunapp”, “nEApp”, “nPAny”, “nPList”, “nPPar”, “nPBase”, “nPCons”,
“nPAs”, “nPOps”, “nPattern”, “nPatterns”, “nLetBinding”, “nLetBindings”,
“nLetRecBinding”, “nLetRecBindings”, “nPatternMatches”, “nPatternMatch”,
“nEMatch”, “nETry”, “nEFun”, “nEFunction”, “nELet”, “nELetRec”,
“nEWhile”, “nEFor”, “nEUnclosed”, “nENeg”, “nEShift”, “nEMult”, “nEAdd”,
“nECons”, “nECat”, “nERel”, “nEAnd”, “nEOr”, “nEHolInfix”, “nEProd”,
“nEAssign”, “nEIf”, “nESeq”, “nExpr”, “nTypeDefinition”, “nTVar”,
“nTBase”, “nTConstr”, “nTProd”, “nTFun”, “nType”, “nTypeList”,
“nTypeLists”, “nTypeParams”, “nTypeDef”, “nTypeDefs”, “nConstrDecl”,
“nTypeReprs”, “nTypeRepr”, “nTypeInfo”, “nUpdate”, “nUpdates”,
“nFieldDec”, “nFieldDecs”, “nRecord”, “nConstrArgs”, “nExcDefinition”,
“nTopLet”, “nTopLetRec”, “nOpen”, “nSemis”, “nExprItem”, “nExprItems”,
“nModuleDef”, “nModTypeName”, “nModTypePath”, “nSigSpec”, “nExcType”,
“nValType”, “nOpenMod”, “nIncludeMod”, “nModTypeAsc”, “nModTypeAssign”,
“nSigItem”, “nSigItems”, “nModuleType”, “nModAscApp”, “nModAscApps”,
“nLiteral”, “nIdent”, “nEList”, “nEConstr”, “nERecUpdate”, “nERecCons”,
“nEBase”, “nEPrefix”, “nEIndex”, “nERecProj”, “nELazy”, “nEAssert”,
“nEFunapp”, “nEApp”, “nPAny”, “nPList”, “nPPar”, “nPatLiteral”,
“nPBase”, “nPCons”, “nPAs”, “nPOps”, “nPattern”, “nPatterns”,
“nLetBinding”, “nLetBindings”, “nLetRecBinding”, “nLetRecBindings”,
“nPatternMatches”, “nPatternMatch”, “nEMatch”, “nETry”, “nEFun”,
“nEFunction”, “nELet”, “nELetRec”, “nEWhile”, “nEFor”, “nEUnclosed”,
“nENeg”, “nEShift”, “nEMult”, “nEAdd”, “nECons”, “nECat”, “nERel”,
“nEAnd”, “nEOr”, “nEHolInfix”, “nEProd”, “nEAssign”, “nEIf”, “nESeq”,
“nExpr”, “nTypeDefinition”, “nTVar”, “nTBase”, “nTConstr”, “nTProd”,
“nTFun”, “nType”, “nTypeList”, “nTypeLists”, “nTypeParams”, “nTypeDef”,
“nTypeDefs”, “nConstrDecl”, “nTypeReprs”, “nTypeRepr”, “nTypeInfo”,
“nUpdate”, “nUpdates”, “nArrIdx”, “nStrIdx”, “nFieldDec”, “nFieldDecs”,
“nRecord”, “nConstrArgs”, “nExcDefinition”, “nTopLet”, “nTopLetRec”,
“nOpen”, “nSemis”, “nExprItem”, “nExprItems”, “nModuleDef”,
“nModTypeName”, “nModTypePath”, “nSigSpec”, “nExcType”, “nValType”,
“nOpenMod”, “nIncludeMod”, “nModTypeAsc”, “nModTypeAssign”, “nSigItem”,
“nSigItems”, “nModuleType”, “nModAscApp”, “nModAscApps”,
“nCakeMLPragma”, “nModuleTypeDef”, “nModExpr”, “nDefinition”,
“nDefItem”, “nModuleItem”, “nModuleItems”, “nStart”];

Expand Down
141 changes: 115 additions & 26 deletions compiler/parsing/ocaml/camlPtreeConversionScript.sml
Expand Up @@ -988,16 +988,31 @@ Definition ptree_PPattern_def:
return $ Pp_tannot p ty
od
| _ => fail (locs, «Impossible: nPPar»)
else if nterm = INL nPatLiteral then
case args of
[arg] =>
fmap Pp_lit (ptree_Literal arg) ++
fmap (λb. Pp_con (SOME b) []) (ptree_Bool arg)
| [minus; arg] =>
do
expect_tok minus MinusT;
lf <- destLf arg;
tk <- option $ destTOK lf;
if isInt tk then
return $ Pp_lit $ IntLit $ -(THE $ destInt tk)
else
fail (locs, «Impossible: nPatLiteral»)
od
| _ => fail (locs, «Impossible: nPatLiteral»)
else if nterm = INL nPBase then
case args of
[arg] =>
do
n <- nterm_of arg;
if n = INL nValueName then
fmap Pp_var (ptree_ValueName arg)
else if n = INL nLiteral then
fmap Pp_lit (ptree_Literal arg) ++
fmap (λb. Pp_con (SOME b) []) (ptree_Bool arg)
else if n = INL nPatLiteral then
ptree_PPattern arg
else if n = INL nPAny ∨ n = INL nPList ∨ n = INL nPPar then
ptree_PPattern arg
else
Expand Down Expand Up @@ -1158,33 +1173,42 @@ Definition build_fun_lam_def:
body pats
End

(* This builds the body of a let rec expression out of a list of patterns
* (a list instead of one, because of or-patterns) and an expression body.
*
* TODO
* This is sort-of like build_fun_lam but accepts _lists_ of patterns at
* each level. I think we should replace build_fun_lam with this thing.
*)

(*
each entry is a function name
a list of patterns
a body expression
a letrec must be a function name, a variable name, and a body expression.
there's at least one pattern.
- if the first pattern is not a variable, invent one and immediately match on
it using the first pattern
- if the first pattern is a variable, create a fun binding
* build_letrec is given a list of entries consisting of:
* - a function name
* - a list of patterns
* - a body expression.
*
* A letrec in the CakeML AST is made from a function name, a variable name, and
* a body expression. If there is no pattern in the list given to build_letrec,
* then we use the empty variable name ("") and apply the body expression to
* this variable (like eta expansion). This enables us to write things like:
* let rec f = function ... -> ...
* or
* let rec f x y = ...
* and g = ...
*
* As a consequence, the parser will turn this:
* let rec f x = ...
* and y = 5
* into this:
* let rec f x = ...
* and y x = 5 x
* which is different from what OCaml generates (it correctly binds the value 5
* to y with a let).
*
* Patterns are turned into variables using pattern match expressions:
* - If the first pattern is not a variable, invent a variable name and
* match on it using the first pattern.
* - If the first pattern is a variable, create a lambda.
*)

Definition build_letrec_def:
build_letrec =
MAP (λ(f,ps,x).
case ps of
[] => (f,"", Var (Short " <impossible> "))
[] =>
(f, "", App Opapp [x; Var (Short "")])
| Pvar v::ps =>
(f, v, build_fun_lam x ps)
| p::ps =>
Expand Down Expand Up @@ -1448,13 +1472,29 @@ Definition ptree_Expr_def:
return $ FOLDL build_record_upd e us
od
| _ => fail (locs, «Impossible: nERecUpdate»)
else if nterm = INL nERecProj then
else if nterm = INL nEIndex then
case args of
[arg] => ptree_Expr nEPrefix arg
| [arg; idx] =>
do
pfx <- ptree_Expr nEPrefix arg;
idx_expr <- ptree_Index idx;
case idx_expr of
INL str_idx =>
return $ build_funapp (Var (Long "String" (Short "sub")))
[pfx; str_idx]
| INR arr_idx =>
return $ build_funapp (Var (Long "Array" (Short "sub")))
[pfx; arr_idx]
od
| _ => fail (locs, «Impossible: nEIndex»)
else if nterm = INL nERecProj then
case args of
[arg] => ptree_Expr nEIndex arg
| [arg; dot; fn] =>
do
expect_tok dot DotT;
x <- ptree_Expr nEPrefix arg;
x <- ptree_Expr nEIndex arg;
f <- ptree_FieldName fn;
return $ build_record_proj f x
od
Expand Down Expand Up @@ -1859,6 +1899,22 @@ Definition ptree_Expr_def:
fail (locs, «Or-patterns are not allowed in let (rec) bindings»));
return (nm, FLAT ps, bd)
od
| [id; colon; type; eq; expr] =>
do
expect_tok eq EqualT;
expect_tok colon ColonT;
nm <- ptree_ValueName id;
ty <- ptree_Type type;
bd <- ptree_Expr nExpr expr;
return (nm, [], Tannot bd ty)
od
| [id; eq; expr] =>
do
expect_tok eq EqualT;
nm <- ptree_ValueName id;
bd <- ptree_Expr nExpr expr;
return (nm, [], bd)
od
| _ => fail (locs, «Impossible: nLetRecBinding»)
else
fail (locs, «Expected a let rec binding non-terminal»)) ∧
Expand Down Expand Up @@ -1904,6 +1960,15 @@ Definition ptree_Expr_def:
fail (locs, «Or-patterns are not allowed in let (rec) bindings»));
return $ INR (nm, FLAT ps, bd)
od
| [id; colon; type; eq; bod] =>
do
expect_tok eq EqualT;
expect_tok colon ColonT;
nm <- ptree_ValueName id;
ty <- ptree_Type type;
bd <- ptree_Expr nExpr bod;
return $ INL (Pvar nm, Tannot bd ty)
od
| [id; pats; colon; type; eq; bod] =>
do
expect_tok eq EqualT;
Expand Down Expand Up @@ -2054,7 +2119,30 @@ Definition ptree_Expr_def:
od
| _ => fail (locs, «Impossible: nUpdates»)
else
fail (locs, «Expected an updates non-terminal»))
fail (locs, «Expected an updates non-terminal»)) ∧
(ptree_Index (Lf (_, locs)) =
fail (locs, «Expected an index non-terminal»)) ∧
(ptree_Index (Nd (nterm,locs) args) =
if nterm = INL nArrIdx then
case args of
[dotp; expr; rpar] =>
do
expect_tok dotp DotParenT;
expect_tok rpar RparT;
fmap INR $ ptree_Expr nExpr expr
od
| _ => fail (locs, «Impossible: nArrIdx»)
else if nterm = INL nStrIdx then
case args of
[dotb; expr; rbrack] =>
do
expect_tok dotb DotBrackT;
expect_tok rbrack RbrackT;
fmap INL $ ptree_Expr nExpr expr
od
| _ => fail (locs, «Impossible: nStrIdx»)
else
fail (locs, «Expected an index non-terminal»))
Termination
WF_REL_TAC ‘measure $ sum_size (pair_size camlNT_size psize)
$ sum_size psize
Expand All @@ -2065,6 +2153,7 @@ Termination
$ sum_size psize
$ sum_size (SUC o list_size psize)
$ sum_size (SUC o list_size psize)
$ sum_size psize
$ sum_size psize psize’
\\ simp [parsetree_size_lemma]
End
Expand Down

0 comments on commit a50a290

Please sign in to comment.