diff --git a/compiler/bootstrap/translation/lexerProgScript.sml b/compiler/bootstrap/translation/lexerProgScript.sml index b83dda5f4b..6ce7573a9f 100644 --- a/compiler/bootstrap/translation/lexerProgScript.sml +++ b/compiler/bootstrap/translation/lexerProgScript.sml @@ -49,50 +49,50 @@ fun def_of_const tm = let val _ = (find_def_for_const := def_of_const); val _ = translate get_token_eqn + +val _ = translate (next_token_def |> SIMP_RULE std_ss [next_sym_eq]) + val _ = translate lexer_fun_def -(* - TODO: - unhex_side unprovable because UNHEX isn't defined for a bunch of inputs - num_from_dec_string_side is unprovable because of unhex_side -*) val l2n_side = prove(`` ∀b a. a ≠ 0 ⇒ l2n_side a b``, Induct>> rw[Once (fetch"-""l2n_side_def")]) -val num_from_dec_string_side = prove(`` - ∀x. num_from_dec_string_side x ⇔ T``, - simp[Once (fetch"-""num_from_dec_string_side_def")]>>rw[] +val num_from_dec_string_alt_side = prove(`` + ∀x. num_from_dec_string_alt_side x ⇔ T``, + simp[Once (fetch"-""num_from_dec_string_alt_side_def")]>> + strip_tac>>CONJ_TAC >- simp[Once (fetch"-""s2n_side_def"),l2n_side] >> - cheat) + simp[Once (fetch"-""unhex_alt_side_def"),Once (fetch"-""unhex_side_def"),isDigit_def]>>Cases>> + strip_tac>> + `48 ≤ n ∧ n ≤ 57` by + fs[ORD_CHR]>> + `n = 48 ∨ n = 49 ∨ n = 50 ∨ + n = 51 ∨ n = 52 ∨ n = 53 ∨ + n = 54 ∨ n = 55 ∨ n = 56 ∨ n = 57` by + DECIDE_TAC>> + fs[]); val read_string_side = prove(`` ∀x y. read_string_side x y ⇔ T``, ho_match_mp_tac read_string_ind>> rw[]>> - simp[Once (fetch"-""read_string_side_def")]) + simp[Once (fetch"-""read_string_side_def")]); -val next_sym_side = prove(`` - ∀x. next_sym_side x ⇔ T``, - ho_match_mp_tac next_sym_ind>>rw[]>> - simp[Once (fetch"-""next_sym_side_def")]>> - rw[]>> - simp[read_string_side]>> - (* - The following are all num_from_dec_string_side cases - read_while isDigit should provide strong enough preconditions - *) - cheat) +val next_sym_alt_side = prove(`` + ∀x. next_sym_alt_side x ⇔ T``, + ho_match_mp_tac next_sym_alt_ind>>rw[]>> + simp[Once (fetch"-""next_sym_alt_side_def"),num_from_dec_string_alt_side,read_string_side]>> + rw[]>>metis_tac[]); val lexer_fun_side = prove(`` ∀x. lexer_fun_side x ⇔ T``, ho_match_mp_tac lexer_fun_ind>>rw[]>> simp[Once (fetch"-""lexer_fun_side_def"), - Once (fetch"-""next_token_side_def"), - next_sym_side]) |> update_precondition + Once (fetch"-""next_token_side_def"),next_sym_alt_side]) |> update_precondition val _ = export_theory(); diff --git a/compiler/parsing/lexer_implScript.sml b/compiler/parsing/lexer_implScript.sml index d7219837c2..f230e1f77d 100644 --- a/compiler/parsing/lexer_implScript.sml +++ b/compiler/parsing/lexer_implScript.sml @@ -152,6 +152,113 @@ val get_token_eqn = Q.store_thm ("get_token_eqn", val _ = computeLib.add_persistent_funs(["get_token_eqn"]); +val unhex_alt_def = Define` + unhex_alt x = (if isDigit x then UNHEX x else 0n)` + +val num_from_dec_string_alt_def = Define `num_from_dec_string_alt = s2n 10 unhex_alt`; + +val next_sym_alt_def = tDefine "next_sym_alt"` + (next_sym_alt "" = NONE) /\ + (next_sym_alt (c::str) = + if isSpace c then (* skip blank space *) + next_sym_alt str + else if isDigit c then (* read number *) + let (n,rest) = read_while isDigit str [] in + SOME (NumberS (&(num_from_dec_string_alt (c::n))), rest) + else if c = #"~" /\ str <> "" /\ isDigit (HD str) then (* read negative number *) + let (n,rest) = read_while isDigit str [] in + SOME (NumberS (0- &(num_from_dec_string_alt n)), rest) + else if c = #"'" then (* read type variable *) + let (n,rest) = read_while isAlphaNumPrime str [c] in + SOME (OtherS n, rest) + else if c = #"\"" then (* read string *) + let (t,rest) = read_string str "" in + SOME (t, rest) + else if isPREFIX "*)" (c::str) then + SOME (ErrorS, TL str) + else if isPREFIX "#\"" (c::str) then + let (t,rest) = read_string (TL str) "" in + SOME (mkCharS t, rest) + else if isPREFIX "(*" (c::str) then + case skip_comment (TL str) 0 of + | NONE => SOME (ErrorS, "") + | SOME rest => next_sym_alt rest + else if is_single_char_symbol c then (* single character tokens, i.e. delimiters *) + SOME (OtherS [c], str) + else if isSymbol c then + let (n,rest) = read_while isSymbol str [c] in + SOME (OtherS n, rest) + else if isAlpha c then (* read identifier *) + let (n,rest) = read_while isAlphaNumPrime str [c] in + case rest of + #"."::rest' => + (case rest' of + c'::rest' => + if isAlpha c' then + let (n', rest'') = read_while isAlphaNumPrime rest' [c'] in + SOME (LongS (n ++ "." ++ n'), rest'') + else if isSymbol c' then + let (n', rest'') = read_while isSymbol rest' [c'] in + SOME (LongS (n ++ "." ++ n'), rest'') + else + SOME (ErrorS, rest') + | "" => SOME (ErrorS, [])) + | _ => SOME (OtherS n, rest) + else if c = #"_" then SOME (OtherS "_", str) + else (* input not recognised *) + SOME (ErrorS, str))` + (WF_REL_TAC `measure LENGTH` THEN REPEAT STRIP_TAC + THEN IMP_RES_TAC (GSYM read_while_thm) + THEN IMP_RES_TAC (GSYM read_string_thm) + THEN IMP_RES_TAC skip_comment_thm THEN Cases_on `str` + THEN FULL_SIMP_TAC (srw_ss()) [LENGTH] THEN DECIDE_TAC) + +val EVERY_isDigit_imp = prove(`` + EVERY isDigit x ⇒ + MAP UNHEX x = MAP unhex_alt x``, + rw[]>>match_mp_tac LIST_EQ>>fs[EL_MAP,EVERY_EL,unhex_alt_def]) + +val toNum_rw = prove(`` + ∀x. EVERY isDigit x ⇒ + toNum x = num_from_dec_string_alt x``, + rw[ASCIInumbersTheory.s2n_def,ASCIInumbersTheory.num_from_dec_string_def,num_from_dec_string_alt_def]>> + AP_TERM_TAC>> + match_mp_tac EVERY_isDigit_imp>> + metis_tac[rich_listTheory.EVERY_REVERSE]) + +val EVERY_IMPLODE = prove(`` + ∀ls P. + EVERY P (IMPLODE ls) ⇔ EVERY P ls``, + Induct>>fs[]) + +val read_while_P_lem = prove(`` + ∀ls rest P x y. + EVERY P rest ∧ + read_while P ls rest = (x,y) ⇒ + EVERY P x``, + Induct>>fs[read_while_def]>>rw[]>> + fs[EVERY_IMPLODE,rich_listTheory.EVERY_REVERSE]>> + first_assum match_mp_tac>>fs[]>> + qexists_tac`STRING h rest`>>fs[]) + +val read_while_P = prove(`` + ∀ls P x y. + read_while P ls "" = (x,y) ⇒ + EVERY P x``, + rw[]>>ho_match_mp_tac read_while_P_lem>> + MAP_EVERY qexists_tac [`ls`,`""`,`y`]>>fs[]) + +val next_sym_eq = store_thm("next_sym_eq", + ``∀x. next_sym x = next_sym_alt x``, + ho_match_mp_tac next_sym_ind>>fs[next_sym_def,next_sym_alt_def]>>rw[]>> + TRY(BasicProvers.TOP_CASE_TAC>>fs[]>>NO_TAC)>> + TRY(rpt(pop_assum mp_tac)>> EVAL_TAC>> simp[]>>NO_TAC)>> + pairarg_tac>>fs[]>> + match_mp_tac toNum_rw>> + fs[]>> + ho_match_mp_tac read_while_P>> + metis_tac[]); + (* lex_until_toplevel_semicolon *) val lex_aux_def = tDefine "lex_aux" ` diff --git a/semantics/lexer_funScript.sml b/semantics/lexer_funScript.sml index c7d8fc8865..f801cda397 100644 --- a/semantics/lexer_funScript.sml +++ b/semantics/lexer_funScript.sml @@ -33,7 +33,7 @@ val read_while_def = Define ` if P c then read_while P cs (c :: s) else (IMPLODE (REVERSE s),STRING c cs))`; -val read_while_thm = prove( +val read_while_thm = store_thm("read_while_thm", ``!cs s cs' s'. (read_while P cs s = (s',cs')) ==> STRLEN cs' <= STRLEN cs``, Induct THEN SRW_TAC [][read_while_def] THEN SRW_TAC [][] THEN @@ -60,7 +60,7 @@ val read_string_def = tDefine "read_string" ` (WF_REL_TAC `measure (LENGTH o FST)` THEN REPEAT STRIP_TAC THEN Cases_on `str` THEN FULL_SIMP_TAC (srw_ss()) [] THEN DECIDE_TAC) -val read_string_thm = prove( +val read_string_thm = store_thm("read_string_thm", ``!s t x1 x2. (read_string s t = (x1,x2)) ==> (LENGTH x2 <= LENGTH s + LENGTH t)``, ONCE_REWRITE_TAC [EQ_SYM_EQ] @@ -85,7 +85,7 @@ val skip_comment_def = Define ` if [x;y] = "*)" then (if d = 0 then SOME xs else skip_comment xs (d-1)) else skip_comment (y::xs) d)` -val skip_comment_thm = prove( +val skip_comment_thm = store_thm("skip_comment_thm", ``!xs d str. (skip_comment xs d = SOME str) ==> LENGTH str <= LENGTH xs``, ONCE_REWRITE_TAC [EQ_SYM_EQ] THEN HO_MATCH_MP_TAC (fetch "-" "skip_comment_ind") THEN REPEAT STRIP_TAC