# Lecture 4 - Parsers

See below the code for an easy recognizer for the grammar 

```
A ::= id | B
B ::= int | (A)
```

In [1]:
type token = ID of string | INT of int | LBRA | RBRA | EOF

exception SyntaxError of string

let rec parseA (ts : token list) : token list = 
    match ts with 
    | ID x :: ts' -> ts'
    | _ -> parseB ts
    
and parseB (ts: token list) : token list = 
    match ts with
    | INT x :: ts' -> ts'
    | LBRA :: ts' -> (let ts'' = parseA ts'
                       in (match ts'' with 
                           | RBRA :: ts''' -> ts'''
                           | _ -> raise (SyntaxError "Expected RBRA.")
                           ))
    | _ -> raise (SyntaxError "Expected INT or LBRA") 

type token = ID of string | INT of int | LBRA | RBRA | EOF


exception SyntaxError of string


val parseA : token list -> token list = <fun>
val parseB : token list -> token list = <fun>


In [2]:
parseA [LBRA; INT 4; RBRA]

- : token list = []


In [3]:
parseA [ID "x"; LBRA; RBRA]

- : token list = [LBRA; RBRA]


In [4]:
parseA [RBRA; INT 4]

error: runtime_error

In [5]:
parseB [ID "x"]

error: runtime_error

In [6]:
parseB [LBRA; LBRA; INT 3; RBRA; RBRA]

- : token list = []


This is easily extended to a full parser which also yields a parse tree: 

In [7]:
type parsetree = A1 of token | A2 of parsetree | B1 of token | B2 of token * parsetree * token 

let rec parseA (ts : token list) : token list * parsetree = 
    match ts with 
    | ID x :: ts' -> (ts', A1 (ID x))
    | _ -> let (ts',ast) = parseB ts
           in (ts', A2 ast)
    
and parseB (ts: token list) : token list * parsetree = 
    match ts with
    | INT x :: ts' -> (ts', B1 (INT x))
    | LBRA :: ts' -> (let (ts'', ast) = parseA ts'
                       in (match ts'' with 
                           | RBRA :: ts''' -> (ts''', B2 (LBRA, ast, RBRA))
                           | _ -> raise (SyntaxError "Expected RPAREN.")
                           ))
    | _ -> raise (SyntaxError "Expected INT or LBRA") 

type parsetree =
    A1 of token
  | A2 of parsetree
  | B1 of token
  | B2 of token * parsetree * token


val parseA : token list -> token list * parsetree = <fun>
val parseB : token list -> token list * parsetree = <fun>


In [8]:
parseA [LBRA; INT 4; RBRA]

- : token list * parsetree = ([], A2 (B2 (LBRA, A2 (B1 (INT 4)), RBRA)))


In [9]:
parseA [ID "x"; LBRA; RBRA]

- : token list * parsetree = ([LBRA; RBRA], A1 (ID "x"))


In [10]:
parseA [RBRA; INT 4]

error: runtime_error

In [11]:
parseB [ID "x"]

error: runtime_error

In [12]:
parseB [LBRA; LBRA; INT 3; RBRA; RBRA]

- : token list * parsetree =
([], B2 (LBRA, A2 (B2 (LBRA, A2 (B1 (INT 3)), RBRA)), RBRA))


## Example Recursive Descent Recognizer

See the following concrete grammar for listing ints separated by a semicolon: 

```
L := INT R
R := ; L | ε
```

In [13]:
type token =  INT | SEMI | EOF

let parse_token (x : token) (xs : token list) = match xs with 
| y :: ys -> if (x == y) then ys else raise (SyntaxError "Token expected.")
| _ -> raise (SyntaxError "Token expected.") 

let rec parseL ts = parseR (parse_token INT ts)

and parseR ts = match ts with 
                | SEMI :: ts' -> parseL ts'
                | ts' -> ts'

type token = INT | SEMI | EOF


val parse_token : token -> token list -> token list = <fun>


val parseL : token list -> token list = <fun>
val parseR : token list -> token list = <fun>


In [14]:
parseL [ INT; SEMI; INT]

- : token list = []


In [15]:
parseL [INT; SEMI]

error: runtime_error

## Example Left Recursion 

See the following left-recursive grammar for non-empty lists: 

```
L ::= [int C]
C ::=  C ; int  | ε
```

We can define a recursive-descent parser...

In [16]:
type token = LBRA | RBRA | INT | SEMI | EOF

let parse_token (x : token) (xs : token list) = match xs with 
| y :: ys -> if (x == y) then ys else raise (SyntaxError "Token expected.")
| _ -> raise (SyntaxError "Token expected.") 

let rec parseL ts = parse_token RBRA (parseC (parse_token LBRA ts))

and parseC ts = match ts with 
    | SEMI :: ts' -> parse_token INT (parse_token SEMI (parseC ts)) 
    | _ -> ts  

type token = LBRA | RBRA | INT | SEMI | EOF


val parse_token : token -> token list -> token list = <fun>


val parseL : token list -> token list = <fun>
val parseC : token list -> token list = <fun>


... but running it leads to divergence: 

In [17]:
parseC [SEMI; INT; SEMI; INT]

error: runtime_error

## List Recognizer

See the following concrete grammar for lists: 

```
L ::= (  [C]  )
C ::= int [;  C] 
```

We can define the recognizer as follows:

In [18]:
type token = LBRA | RBRA | INT | SEMI | EOF

let parse_token (x : token) (xs : token list) = match xs with 
| y :: ys -> if (x == y) then ys else raise (SyntaxError "Token expected.")
| _ -> raise (SyntaxError "Token expected.") 

let rec parseL ts = match (parse_token LBRA ts) with 
                    | RBRA :: ts' -> ts'
                    | ts' -> parse_token RBRA (parseC ts')

and parseC ts = match (parse_token INT ts) with 
                | SEMI :: ts' -> parseC ts'
                | ts' -> ts'

type token = LBRA | RBRA | INT | SEMI | EOF


val parse_token : token -> token list -> token list = <fun>


val parseL : token list -> token list = <fun>
val parseC : token list -> token list = <fun>


In [19]:
parseL [LBRA; INT; SEMI; INT; RBRA]

- : token list = []


## Expressions 

In the lecture you have learned how to get a concrete grammar for expressions: 
```
exp := term [{+/-} exp]
term := base [{*|/} term]
base := id | int | (exp)

```

Below you can find the corresponding parser:

In [20]:
type op = Plus | Minus | Mult | Div 
type exp = Id of string | Numb of int | Op of exp * op * exp 

type token = ID of string | INT of int
           | PLUS  | MINUS | STAR | SLASH 
           | LBRA | RBRA 

let parse_token (x : token) (xs : token list) = match xs with 
| y :: ys -> if (x == y) then ys else raise (SyntaxError "Token expected.")
| _ -> raise (SyntaxError "Token expected.") 

           
let rec parse_exp (xs : token list) : exp * token list = let 
  (e1, xs') = parse_term xs in 
  match xs' with 
  | PLUS :: xs'' -> let 
      (e2, xs''') = parse_exp xs'' 
      in (Op (e1, Plus, e2), xs''')
  | MINUS :: xs'' -> let 
      (e2, xs''') = parse_exp xs'' 
      in (Op (e1, Minus, e2), xs''')
  | _ -> (e1, xs') 
           
and parse_term (xs : token list) : exp * token list = let 
  (e1, xs') = parse_base xs in 
  match xs' with 
  | STAR :: xs'' -> let 
    (e2, xs''') = parse_term xs''
      in (Op (e1, Mult, e2), xs''') 
  | SLASH :: xs'' -> let 
    (e2, xs''') = parse_term xs''
      in (Op (e1, Div, e2), xs''')    
  | _ -> (e1, xs')
  
and parse_base (xs : token list) : exp * token list = match xs with 
  | ID x :: xs' -> (Id x, xs')
  | INT x :: xs' -> (Numb x , xs')
  | LBRA :: xs' -> (let 
        (e, xs'') = parse_exp xs' in let
         xs''' = parse_token RBRA xs''
      in (e, xs'''))
  | _ -> raise (SyntaxError "Expected ID, INT or LBRA.")   

type op = Plus | Minus | Mult | Div


type exp = Id of string | Numb of int | Op of exp * op * exp


type token =
    ID of string
  | INT of int
  | PLUS
  | MINUS
  | STAR
  | SLASH
  | LBRA
  | RBRA


val parse_token : token -> token list -> token list = <fun>


val parse_exp : token list -> exp * token list = <fun>
val parse_term : token list -> exp * token list = <fun>
val parse_base : token list -> exp * token list = <fun>


In [21]:
parse_exp [LBRA; INT 3; PLUS; ID "x"; RBRA; STAR; LBRA; INT 5; MINUS; INT 2; RBRA]

- : exp * token list =
(Op (Op (Numb 3, Plus, Id "x"), Mult, Op (Numb 5, Minus, Numb 2)), [])
