# CW2.5:  Parser for FUNC

In this week's assignment, you'll write a parser.

Please submit a .zip file containing this notebook and the file ``CW/func.mll`` on Canvas.

Please ensure that you do not change the name or signature of the functions ``parse_exp``, ``parse_program``, etc. 

##  Parsing 

Below you can see an abstract grammar for the language you've seen before.

In [23]:
type exp = Numb of int | Id of string | App of string * exp list

type bop = Less | LessEq | Eq | NEq 
type cond = C of bop * exp * exp

type statement =
  Assign of string * exp
| Read of string 
| Write of exp 
| If of cond * statement list
| Ite of cond * statement list * statement list
| While of cond * statement list

type mmethod = M of string (* name of function *)
                * string list (* arguments *)
                * string list (* declarations *) 
                * statement list (* function body *)
                * string option (* possible return value value *)

type program = P of mmethod list

type exp = Numb of int | Id of string | App of string * exp list


type bop = Less | LessEq | Eq | NEq


type cond = C of bop * exp * exp


type statement =
    Assign of string * exp
  | Read of string
  | Write of exp
  | If of cond * statement list
  | Ite of cond * statement list * statement list
  | While of cond * statement list


type mmethod =
    M of string * string list * string list * statement list * string option


type program = P of mmethod list


Write a recursive-descent parser for ``FUNC``.
Your parser should contain at least: 
- a function ``parse_exp : token list -> exp * token list``
- a function ``parse_cond : token list -> cond * token list``
- a function ``parse_statement : token list -> statement * token list``
- a function ``parse_program : program -> statement * token list``

You will require more functions. 
You can get partial points by providing e.g. only ``parse_exp``. 

**Hints:** 
- Your parser does **not** have to ensure that variables, functions, the ``main`` function etc. exists or functions are applied to the right number of arguments.
- You will want to test your program step-by-step, e.g. test that ``parse_exp`` runs as expected before writing ``parse_cond``. 
- To write the parser, you'll require a definition of tokens. If you have done CW 2.4, you can use your definition of tokens from before (it will be automatically included by the definition below). If you haven't done CW 2.4/don't plan to do CW 2.4, please write Kathrin a brief e-mail (or ask during labs/the lecture): She will provide you with the definition of a data type of tokens.

In [25]:
(* Code for importing the part of lexing. *)
#require "jupyter.notebook" ;;
open Jupyter_notebook ;;

In [8]:
(* Code for importing your definition of a lexer.
If not implemented, you want to insert here the definition of tokens, 
and instead of the provided tests, test your parser with lists of tokens. *)

(* Run the lexer generator *)
Process.sh "ocamllex func.mll";;

(* Compile and load the file produced by the lexer *)
Process.sh "ocamlc -c func.ml";;
#load "func.cmo";;

(* Convert the buffer into a list for further processing. *)
let rec stream_to_list buffer = 
    match Func.token buffer with 
    | EOF -> []
    | x -> x :: stream_to_list buffer

70 states, 4692 transitions, table size 19188 bytes


- : Jupyter_notebook.Process.t =
{Jupyter_notebook.Process.exit_status = Unix.WEXITED 0; stdout = None;
 stderr = None}


- : Jupyter_notebook.Process.t =
{Jupyter_notebook.Process.exit_status = Unix.WEXITED 0; stdout = None;
 stderr = None}


val stream_to_list : Lexing.lexbuf -> Func.token List.t = <fun>


In [4]:
exception SyntaxError of string
open List

(* Optional:
   You might want to write a function to print tokens. 
   Comment out if not needed.  *) 
      
let print_token (t : Func.token) : string = match t with 
 | _ -> "TO IMPLEMENT" 

let rec print_list (s :  Func.token list) = match s with 
  | [] -> ""
  | x :: xs -> String.cat (print_token x) (String.cat " " (print_list xs))

exception SyntaxError of string


val print_token : Func.token -> string = <fun>


val print_list : Func.token list -> string = <fun>


In [5]:
open Func

In [6]:
(* Token parsing function, ensuring identifiers are handled correctly *)
let rec parse_id (ts: Func.token list) = match ts with
  | ID x :: ts' -> (x, ts')  (* Extract identifier and move to next token *)
  | _ -> raise (SyntaxError "Not an identifier.")
  
let parse_token (x : token) (xs : token list) = match xs with 
 | y :: ys -> if (x == y) then ys 
                 else raise (SyntaxError (String.cat "Token expected: "(String.cat (print_token x) (print_list xs) )))
 | _ -> raise (SyntaxError (String.cat "Token expected: "(String.cat (print_token x) (print_list xs) ))) 

val parse_id : Func.token List.t -> string * Func.token list = <fun>


val parse_token : Func.token -> Func.token list -> Func.token list = <fun>


In [36]:
let rec parse_exp (ts : token list) : exp * token list = match ts with
    | ID x :: ts' -> ( match ts' with
                       | LEFTPARANTHESIS :: ts'' -> ( match ts'' with
                                               | RIGHTPARANTHESIS :: xs' -> (App(x,[]),xs')
                                               |_ -> (let (e1,ts''') = parse_exps ts'' in
                                                        match ts''' with
                                                            |RIGHTPARANTHESIS :: ts'''' -> (App(x,e1),ts'''')
                                                            |_->raise (SyntaxError "RIGHTPARANTHESIS Expected")
                                                      )
                                          )
                       | _ -> (Id(x),ts')
                    )
    | INT x :: ts' -> (Numb x, ts')
    | _ -> raise (SyntaxError "ID or Int Expected")
    
    
(* Helper funciton for pare_exp *)
and parse_exps (ts: token list) = let (e1,ts') = parse_exp ts in 
            match ts' with
                    | COMMA :: ts'' -> (let (e2,ts''') = parse_exps ts'' in
                               (e1::e2,ts'''))                          
                    | _ -> ([e1],ts') 

val parse_exp : Func.token list -> exp * Func.token List.t = <fun>
val parse_exps : Func.token list -> exp List.t * Func.token List.t = <fun>


In [11]:
(* Helper functions for conditional operators *)
let parse_bop (ts : token list) = match ts with 
  | LESSEQ :: ts' -> (LessEq, ts')
  | LESS :: ts' -> (Less, ts')
  | EQ :: ts' -> (Eq, ts')
  | NEQ :: ts' -> (NEq, ts')
  | _ -> raise (SyntaxError "Expected LESSEQ, LESS, NEQ, or EQ.")

let parse_cond (ts : token list) : cond * token list =
  let (e1, ts') = parse_bop ts in
  match ts' with
  | LEFTPARANTHESIS :: ts'' ->
      (match ts'' with
       | RIGHTPARANTHESIS :: xs' -> (C (e1, Id "", Id ""), xs')
       | _ ->
           let (e2, ts''') = parse_exp ts'' in
           let xs = parse_token COMMA ts''' in
           let (e3, ys') = parse_exp xs in
           match ys' with
           | RIGHTPARANTHESIS :: ys'' -> (C (e1, e2, e3), ys'')
           | _ -> raise (SyntaxError "RIGHTPARANTHESIS Expected"))
  | _ -> (C (e1, Id "", Id ""), ts')


val parse_bop : Func.token list -> bop * Func.token list = <fun>


val parse_cond : Func.token list -> cond * Func.token list = <fun>


In [18]:
let rec parse_statement (ts : token list) : statement * token list =
  match ts with
  | ID x :: ts' -> parse_assign ts
  | IF :: ts' -> parse_if ts
  | READ :: ts' | WRITE :: ts' -> parse_rw ts
  | WHILE :: ts' -> parse_while ts
  | _ -> raise (SyntaxError "Statement Expected")

and parse_assign (ts : token list) =
  match ts with
  | ID x :: ASSIGN :: ts' ->
      let (e1, ts'') = parse_exp ts' in
      (Assign (x, e1), ts'')
  | _ -> raise (SyntaxError "ID Expected")

and parse_if (ts : token list) =
  let ts' = parse_token IF ts in
  let (e1, ts'') = parse_cond ts' in
  let ts''' = parse_token THEN ts'' in
  let (e2, xs') = parse_statements ts''' in
  match xs' with
  | ELSE :: xs'' ->
      let (e3, ys) = parse_statements xs'' in
      (match ys with
       | ENDIF :: ys' -> (Ite (e1, e2, e3), ys')  (* Remove unnecessary list wrapping *)
       | _ -> raise (SyntaxError "ENDIF Expected"))
  | ENDIF :: xs'' -> (If (e1, e2), xs'')  (* Remove unnecessary list wrapping *)
  | _ -> raise (SyntaxError "ELSE or ENDIF Expected")

and parse_rw (ts : token list) =
  match ts with
  | READ :: ID x :: ts' -> (Read x, ts')
  | WRITE :: ts' ->
      let (e1, xs) = parse_exp ts' in
      (Write e1, xs)
  | _ -> raise (SyntaxError "READ or WRITE Expected")

and parse_while (ts : token list) =
  let ts' = parse_token WHILE ts in
  let (e1, ts'') = parse_cond ts' in
  let xs = parse_token BEGINWHILE ts'' in
  let (e2, xs') = parse_statements xs in
  let xs'' = parse_token ENDWHILE xs' in
  (While (e1, e2), xs'')

and parse_statements (ts : token list) : statement list * token list =
  let (e1, ts') = parse_statement ts in
  match ts' with
  | SEMICOLON :: ts'' ->
      let (e2, ys) = parse_statements ts'' in
      (e1 :: e2, ys)  (* Replaced Seq with list-based approach *)
  | _ -> ([e1], ts')


val parse_statement : Func.token list -> statement * Func.token list = <fun>
val parse_assign : Func.token list -> statement * Func.token List.t = <fun>
val parse_if : Func.token list -> statement * Func.token list = <fun>
val parse_rw : Func.token list -> statement * Func.token List.t = <fun>
val parse_while : Func.token list -> statement * Func.token list = <fun>
val parse_statements : Func.token list -> statement list * Func.token list =
  <fun>


In [21]:
let rec parse_args (ts: Func.token list) =
  let (x, ts') = parse_id ts in
  match ts' with
  | COMMA :: xs -> 
      let (y, xs') = parse_args xs in 
      (x :: y, xs')  (* Accumulate arguments in a list *)
  | _ -> ([x], ts')  (* Return single argument if no comma found *)
 
let rec parse_program (ts: Func.token list) = 
  let (ts', rest) = parse_id ts in  (* First identifier in program *)
  match rest with
  | COMMA :: xs -> 
      let (y, xs') = parse_args xs in
      (ts' :: y, xs')  (* Return a list of method arguments *)
  | _ -> ([ts'], rest)  (* Return single element if no arguments after identifier *)

val parse_args : Func.token list -> string List.t * Func.token list = <fun>


val parse_program : Func.token list -> string List.t * Func.token list =
  <fun>


## Appendix - Example Programs

In [22]:
let ex1 = "method pow(x, y) vars i, res
begin

	res := x;
	i := 1;
	while less(i,y)
	begin
		res := times(res,x);
		i := plus(i,1);
        endwhile;
	write res;
	return res;

endmethod;

method main() vars a, b, x
begin

	a := 5;
	b := 2;
	x := pow(b,a);
	if eq(x,32)
		 then write 1;
	else
		write 0;
	endif;

endmethod;
"    

let ex2 = "method pow(x,y) vars i, res,w
begin

	res := x(da,da(1,2,m(1,1)),1);
	i := 2;
	if eq(x,32) then 
		write 1;
		read a;
	else
		b := 11;
	endif;
	while less(i,y)
	begin
		res := times(res,x);
		i := plus(i,1);
        endwhile;
	write res;
	return res;

endmethod;

method main() vars a, b, x
begin

	a := 5; 
	b := 2;
	x := pow(b,a);
	if eq(x,32)
		 then write 1; 
	else 
		write 0;
	endif; 
endmethod;"

let ex3 = "method main() vars inp, res
begin
read inp;
res:=0;
while less(0,inp)
begin
res := plus(res,inp);
inp := minus(inp,1);
endwhile;
write res;
endmethod;
"

let ex4 = "method sum(inp) vars res
begin
res:=0;
while less(0,inp)
begin
res := plus(res,inp);
inp := minus(inp,1);
endwhile;
return res;
endmethod;

method main() vars inp,res
begin
read inp;
res := sum(inp);
write res;
endmethod;"

let ex5 = "method sum(inp) vars tmp
begin
if eq(inp,0) then
res := inp;
else
tmp := sum(minus(inp,1));
res := plus(tmp,inp);
endif;
endmethod;

method main() vars inp,res
begin
read inp;
res := sum(inp);
write res;
endmethod;"

let text_to_ast ex = parse_program (stream_to_list (Lexing.from_string ex))

(* Compare with what you expect *)
let parsed1 = text_to_ast ex1 
let parsed2 = text_to_ast ex2
let parsed3 = text_to_ast ex3
let parsed4 = text_to_ast ex4
let parsed5 = text_to_ast ex5

val ex1 : string =
  "method pow(x, y) vars i, res\nbegin\n\n\tres := x;\n\ti := 1;\n\twhile less(i,y)\n\tbegin\n\t\tres := times(res,x);\n\t\ti := plus(i,1);\n        endwhile;\n\twrite res;\n\treturn res;\n\nendmethod;\n\nmethod main() vars a, b, x\nbegin\n\n\ta := 5;\n\tb := 2;\n\tx := pow(b,a);\n\tif eq(x,32)\n\t\t then write 1;\n\telse\n\t\twrite 0;\n\tendif;"... (* string length 312; truncated *)


val ex2 : string =
  "method pow(x,y) vars i, res,w\nbegin\n\n\tres := x(da,da(1,2,m(1,1)),1);\n\ti := 2;\n\tif eq(x,32) then \n\t\twrite 1;\n\t\tread a;\n\telse\n\t\tb := 11;\n\tendif;\n\twhile less(i,y)\n\tbegin\n\t\tres := times(res,x);\n\t\ti := plus(i,1);\n        endwhile;\n\twrite res;\n\treturn res;\n\nendmethod;\n\nmethod main() vars a, b, x\nbegin\n\n\t"... (* string length 401; truncated *)


val ex3 : string =
  "method main() vars inp, res\nbegin\nread inp;\nres:=0;\nwhile less(0,inp)\nbegin\nres := plus(res,inp);\ninp := minus(inp,1);\nendwhile;\nwrite res;\nendmethod;\n"


val ex4 : string =
  "method sum(inp) vars res\nbegin\nres:=0;\nwhile less(0,inp)\nbegin\nres := plus(res,inp);\ninp := minus(inp,1);\nendwhile;\nreturn res;\nendmethod;\n\nmethod main() vars inp,res\nbegin\nread inp;\nres := sum(inp);\nwrite res;\nendmethod;"


val ex5 : string =
  "method sum(inp) vars tmp\nbegin\nif eq(inp,0) then\nres := inp;\nelse\ntmp := sum(minus(inp,1));\nres := plus(tmp,inp);\nendif;\nendmethod;\n\nmethod main() vars inp,res\nbegin\nread inp;\nres := sum(inp);\nwrite res;\nendmethod;"


val text_to_ast : string -> string List.t * Func.token list = <fun>


error: runtime_error