# Code Generation

The following code allows us to use the previously defined lexer/parser:

In [1]:
#require "jupyter.notebook" ;;

open Jupyter_notebook;;

Process.sh "ocamllex simp.mll";;
Process.sh "ocamlc -c simp.ml";;

#use "psimp.ml"

/home/opam/.opam/4.13/lib/base64: added to search path
/home/opam/.opam/4.13/lib/base64/base64.cma: loaded
/home/opam/.opam/4.13/lib/ocaml/compiler-libs: added to search path
/home/opam/.opam/4.13/lib/ocaml/compiler-libs/ocamlcommon.cma: loaded
/home/opam/.opam/4.13/lib/seq: added to search path
/home/opam/.opam/4.13/lib/yojson: added to search path
/home/opam/.opam/4.13/lib/yojson/yojson.cma: loaded
/home/opam/.opam/4.13/lib/ppx_yojson_conv_lib: added to search path
/home/opam/.opam/4.13/lib/ppx_yojson_conv_lib/ppx_yojson_conv_lib.cma: loaded
/home/opam/.opam/4.13/lib/ocaml/unix.cma: loaded
/home/opam/.opam/4.13/lib/bytes: added to search path
/home/opam/.opam/4.13/lib/uuidm: added to search path
/home/opam/.opam/4.13/lib/uuidm/uuidm.cma: loaded
/home/opam/.opam/4.13/lib/jupyter: added to search path
/home/opam/.opam/4.13/lib/jupyter/jupyter.cma: loaded
/home/opam/.opam/4.13/lib/result: added to search path
/home/opam/.opam/4.13/lib/result/result.cma: loaded
/home/opam/.opam/4.13/lib/

error: runtime_error

## Representing MIPS Code in OCaml

We define registers as type synomym to ``int``, and define several aliases for the mnemonics: 

In [2]:
(* We represent registers as numbers. Registers are represented by 0 to 31. *)
type register = int

(* Value returned by a subroutine *)
let v0 : register = 2 
let v1 : register = 3 

(* Arguments to subroutine *)
let a0 : register = 4 
let a1 : register = 5
let a2 : register = 6
let a3 : register = 7

(* Temporary registers *)
let t0 : register = 8
let t1 : register = 9
let t2 : register = 10
let t3 : register = 11
let t4 : register = 12
let t5 : register = 13
let t6 : register = 14
let t7 : register = 15

(* Saved registers *)
let s0 : register = 16
let s1 : register = 17 
let s2 : register = 18 
let s3 : register = 19 
let s4 : register = 20 
let s5 : register = 21
let s6 : register = 22 
let s7 : register = 23 

(* Temporary registers $t8 and $t9 will be used for interemdiate results. *)
let t8 : register = 24 (* $t8 *)
let t9 : register = 25 (* $t9 *)

let (sp : register) = 29 (* stack pointer *)
let (fp : register) = 30 (* frame pointer *)
let (ra : register) = 31 (* return address *)

type register = int


val v0 : register = 2


val v1 : register = 3


val a0 : register = 4


val a1 : register = 5


val a2 : register = 6


val a3 : register = 7


val t0 : register = 8


val t1 : register = 9


val t2 : register = 10


val t3 : register = 11


val t4 : register = 12


val t5 : register = 13


val t6 : register = 14


val t7 : register = 15


val s0 : register = 16


val s1 : register = 17


val s2 : register = 18


val s3 : register = 19


val s4 : register = 20


val s5 : register = 21


val s6 : register = 22


val s7 : register = 23


val t8 : register = 24


val t9 : register = 25


val sp : register = 29


val fp : register = 30


val ra : register = 31


Instructions are represented as an abstract data type.

See https://www.dsi.unive.it/~gasparetto/materials/MIPS_Instruction_Set.pdf for full explanation.

In [3]:
(* We represent instructions as an abstract data type. *)

type label = string

type instruction =  Add of register * register * register (* add $1, $2, $3 - $1 = $2 + $3 *)
                   | Sub of register * register * register (* sub $1, $2, $3; $1 = $2 - $3 *)
                   | Addi of register * register * int (* addi $1, $2, 100 - $1 = $2 + 100, immediate means a constant number  *)
                   | Addiu of register * register * int (* addi $1, $2, 100 - $1 = $2 + 100, values treated as unsigned, immediate means a constant number  *)
                   | Mul of register * register * register (* mul $1, $2, $3 - $1 = $2 * $3, without overflow, result is only 32 bits *)
                   | Div of register * register (* div $2, $3 - $hi,$low=$2/$3, Remainder stored in special register hi, Quotient stored in special register lo   *)
                   | And of register * register * register (* and $1, $2, $3 - $1 = $2 & $3, bitwise AND *)
                   | Or of register * register * register (* or $1, $2, $3 - $1 = $2 | 100, bitwise OR *)
                   | Andi of register * register * int (* andi $1, $2, 100 - $1 = $2 & 100, bitwise AND with immediate value  *)
                   | Ori of register * register * int (* ori $1, $2, 100 - $1 = $2 | 100, bitwise OR with immediate value  *)
                   | Lw of register * int * register (* lw $1, 100 ($2) - load word, $1 = Memory[$2 + 100], copy from memory to register *)
                   | Sw of register * int * register (* sw $1, 100 ($2) - store word, Memory[$2 + 100] = $1, copy from register to memory *)
                   | La of register * label (* $1 = Address of label *) 
                   | Li of register * int (* li $1, 100 - Loads immediate value into register *)
                   | Move of register * register (* move $1,$2 - $1 = $2, Copy from register to register *)
                   | Mfhi of register (* mfhi $2, $2 = hi, copy from special register hi to general register *)
                   | Mflo of register (* mflo $2, $2 = lo, copy from special register lo to general register *)
                   | Label of label 
                   | Beq of register * register * string (* beq $1, $2, l - if ($1 == $2) go to label l *)
                   | Bne of register * register * string (* bne $1, $2, l - if ($1 != $2) go to label l *)
                   | Bgt of register * register * string (* bgt $1, $2, l - if ($1 > $2) go to label l *)
                   | Blt of register * register * string (* blt $1, $2, l - if ($1 < $2) go to label l *)
                   | Bge of register * register * string (* bge $1, $2, l - if ($1 >= $2) go to label l *)
                   | Ble of register * register * string (* ble $1, $2, l - if ($1 <= $2) go to label l *)                  
                   | J of label (* j l, go to label l, jumpt to target address *)
                   | Jr of register (* jump register, jr $1, go to address stored in $1 *)
                   | Jal of label (* jump and link, e.g. jal l - $ra=PC+4; go to label l - used when making procedure call. This saves the return address in $ra.  *)
                   | SysCall 
                   | Verbatim of string 

type label = string


type instruction =
    Add of register * register * register
  | Sub of register * register * register
  | Addi of register * register * int
  | Addiu of register * register * int
  | Mul of register * register * register
  | Div of register * register
  | And of register * register * register
  | Or of register * register * register
  | Andi of register * register * int
  | Ori of register * register * int
  | Lw of register * int * register
  | Sw of register * int * register
  | La of register * label
  | Li of register * int
  | Move of register * register
  | Mfhi of register
  | Mflo of register
  | Label of label
  | Beq of register * register * string
  | Bne of register * register * string
  | Bgt of register * register * string
  | Blt of register * register * string
  | Bge of register * register * string
  | Ble of register * register * string
  | J of label
  | Jr of register
  | Jal of label
  | SysCall
  | Verbatim of string


MIPS code is simply a list of instructions:

In [4]:
type code = instruction list

type code = instruction list


The following code prints instructions so that they are readible to the MIPS interpreter (https://cpulator.01xz.net/?sys=mipsr5b-spim). 

In [5]:
let print_register (r : register) = 
    match r with 
    | 2 | 3 -> "$v" ^ (string_of_int (r - v0)) 
    | 4 |5 |6 | 7 -> "$a" ^ string_of_int (r - a0)
    | 8|9|10|11|12|13|14|15 -> "$t" ^ string_of_int (r - t0) 
    | 16|17|18|19|20|21|22|23 -> "$s" ^ string_of_int (r - s0)
    | 24 -> "$t8"
    | 25 -> "$t9"
    | 29 -> "$sp"
    | 30 -> "$fp"
    | 31 -> "$ra"
    | _ -> "$" ^ string_of_int r

let print_instruction (i : instruction) = match i with 
    | Add (r1, r2, r3) -> "add " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ print_register r3
    | Sub (r1, r2, r3) -> "sub " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ print_register r3
    | Addi (r1, r2, i) -> "addi " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ string_of_int i
    | Addiu (r1, r2, i) -> "addiu " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ string_of_int i
    | Mul (r1, r2, r3) -> "mul " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ print_register r3
    | Div (r1, r2) -> "div " ^ print_register r1 ^ ", " ^ print_register r2
    | Beq (r1, r2, l) ->  "beq " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ l
    | Bne (r1, r2, l) ->  "bne " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ l
    | Bgt (r1, r2, l) ->  "bgt " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ l
    | Blt (r1, r2, l) ->  "blt " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ l
    | Bge (r1, r2, l) ->  "bge " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ l
    | Ble (r1, r2, l) ->  "ble " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ l
    | Li (r, i) -> "li " ^ print_register r ^ ", " ^ string_of_int i
    | Lw (r1, o, r2) -> "lw " ^ print_register r1 ^ ", " ^ string_of_int o ^ "(" ^ print_register r2 ^ ")" 
    | La (r, l) -> "la " ^ print_register r ^ ", " ^  l
    | Sw (r1, o, r2) -> "sw " ^ print_register r1 ^ ", " ^ string_of_int o ^ "(" ^ print_register r2 ^ ")" 
    | Move (r1, r2) -> "move " ^ print_register r1 ^ ", " ^ print_register r2
    | Mfhi r -> "mfhi "^ print_register r
    | Mflo r -> "mflo "^ print_register r
    | And (r1, r2, r3) -> "and " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ print_register r3
    | Andi (r1, r2, r3) -> "andi " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ print_register r3
    | Or (r1, r2, r3) -> "or " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ print_register r3
    | Ori (r1, r2, r3) -> "ori " ^ print_register r1 ^ ", " ^ print_register r2 ^ ", " ^ print_register r3
    | SysCall -> "syscall"
    | Label l -> l ^ ":"
    | J label -> "j " ^ label 
    | Jr r -> "jr " ^ print_register r
    | Jal label -> "jal " ^ label
    | Verbatim s -> s
   
let rec print_code (c : code) : unit = match c with 
    | [] -> ()
    | c :: cs -> (print_endline (print_instruction c); print_code cs)

val print_register : register -> string = <fun>


val print_instruction : instruction -> string = <fun>


val print_code : code -> unit = <fun>


E.g., you can load the following code into the MIPS interpreter (https://cpulator.01xz.net/?sys=mipsr5b-spim). Try it out!

In [6]:
print_code [Li (t7, 5); Li (t8, 4); Add (t8, t8, t9)]

li $t7, 5
li $t8, 4
add $t8, $t8, $t9


- : unit = ()


## Compiling Expressions

The following code implement push/pop operations:

In [7]:
(* An exception to be used *)
exception E of string

(* Pushing the content of register r to the stack *)
let push (r : register) : code = [Addiu (sp, sp, -4);
                                  Sw (r, 0, sp)]

(* Popping the stack into register r *)
let pop (r : register) : code = [Lw (r, 0, sp);
                                 Addiu (sp, sp, 4)]

exception E of string


val push : register -> code = <fun>


val pop : register -> code = <fun>


We can define compilation of expressions by traversing the abstract syntax tree:

In [8]:
let compile_op (o : op) r1 r2 r = match o with 
    | Plus -> [Add (r1, r2, r)]
    | Minus -> [Sub (r1, r2, r)]
    | _ -> raise (E "Not known.") 

let rec compile_exp (r : register) (e : exp) : code = match e with 
    | Numb n -> [Li (r, n)]
    | Op (e1, o, e2) -> compile_exp t8 e1
                        @ push t8
                        @ compile_exp t9 e2
                        @ pop t8
                        @ compile_op o r t8 t9
    | _ -> raise (E "not known");;

error: compile_error

In [9]:
print_code ((compile_exp s0 (Op (Numb 3, Plus, (Op (Numb 5, Plus, Numb 4))))));;

error: compile_error

## Variables and Assignments

In [10]:
exception EEnv of string

let maxreg = 23

(*  Function that finds the largest register in the environment. *)
let find_max_register (env : int Env.t) = 
    Env.fold (fun _ a b -> max a b) env (s0 -1) 


(* Declaring a variable: 
  - When trying to declare a variable, and there are too many registers already 
    reserved, throw an exception. 
  - Else: Assign to variable x the largest register number + 1.
*)
let declare_var (env: int Env.t) (x:string) : int Env.t = 
    if (find_max_register env) >= maxreg
     then raise (EEnv "Too many variables")
     else Env.add x (1 + find_max_register env) env

exception EEnv of string


val maxreg : int = 23


error: compile_error

In [11]:
Env.find "b"  (declare_var (declare_var Env.empty "b") "a");;

Env.find "a"  (declare_var (declare_var Env.empty "b") "a");;

error: compile_error

In [12]:
let compile_op (o : op) r1 r2 r = match o with 
    | Plus -> Add (r1, r2, r)
    | Minus -> Sub (r1, r2, r)
    | _ -> raise (E "Not implemented.") 

let rec compile_exp env (r : register) (e : exp)  : code = match e with 
    | Id s -> [Move (r, Env.find s env)]
    | Numb n -> [Li (r, n)]
    | Neg e -> compile_exp env r (Op (Numb 0, Minus, e))
    | Op (e1, o, e2) -> compile_exp env t8 e1
                        @ push t8
                        @ compile_exp env t9 e2
                        @ pop t8
                        @ [compile_op o r t8 t9];;

error: compile_error

Compiling expressions: An expression will be translated into a sequence of instructions. 
This will create a value that we store in the given register.

In [13]:
let rec compile_dcls env (dcls : string list)  = match dcls with 
    | [] -> env 
    | dcl :: dcls' -> let env' = declare_var env dcl 
                      in compile_dcls env' dcls'

let compile_cmd env (c : cmd) : code = match c with 
    | Asgn (s, e) -> compile_exp env (Env.find s env) e
    | _ -> raise (E "Not implemented.")

let rec compile_cmds env (cs : cmd list) : code = match cs with 
    | [] -> [] 
    | c :: cs -> compile_cmd env c @ compile_cmds env cs
    

let compile_program (p : program) = match p with 
    | Program (dcls, cmds) -> let env = compile_dcls Env.empty dcls in     
        [Verbatim ".set noreorder"; Verbatim ".text"; Label "_start"]
        @ compile_cmds env cmds
        @ [Li (v0, 10); SysCall]

error: compile_error

In [14]:
let p = "
VAR A;
VAR B;
VAR C;
VAR D;
VAR Z; 
A := 128; 
B := 64;
C := 32;
D := 16; 
Z := (A + B) + (C + D)
"


let mips =  (print_code (compile_program (parse_string p)));;

val p : string =
  "\nVAR A;\nVAR B;\nVAR C;\nVAR D;\nVAR Z; \nA := 128; \nB := 64;\nC := 32;\nD := 16; \nZ := (A + B) + (C + D)\n"


error: compile_error

## Compiling SIMP

See the slides for an explanation.

In [15]:
let counter : int ref = {contents = 0}

(* Counts up from 1, hence gives us a unique number every time *)
let next_val = 
    fun () ->
      counter := (!counter) + 1;
      !counter;;

let compile_op (o : op) r1 r2 r = match o with 
    | Plus -> [Add (r1, r2, r)]
    | Minus -> [Sub (r1, r2, r)]
    | Mult -> [Mul (r1, r2, r)] 
    | Div -> [Div (r1, r2); Mflo r] 

let rec compile_exp env (r : register) (e : exp) : code = match e with 
    | Id s -> [Move (r, Env.find s env)]
    | Numb n -> [Li (r, n)]
    | Neg e -> compile_exp env r (Op (Numb 0, Minus, e))
    | Op (e1, o, e2) -> compile_exp env t8 e1
                        @ push t8
                        @ compile_exp env t9 e2
                        @ pop t8
                        @ compile_op o r t8 t9;;


let compile_branch o r1 r2 label = match o with 
    | Lt -> Bge (r1, r2, label)
    | Lte -> Bgt (r1, r2, label)
    | Gte -> Blt (r1, r2, label)
    | Gt -> Ble (r1, r2, label)
    | Eq -> Bne (r1, r2, label)
    | Neq -> Beq (r1, r2, label)
    
let compile_cond env o e1 e2 label =   
    compile_exp env t8 e1
    @ push t8
    @ compile_exp env t9 e2
    @ pop t8
    @ [compile_branch o t8 t9 label]
    

let rec compile_dcls env dcls = match dcls with 
    | [] -> env 
    | dcl :: dcls' -> let env' = declare_var env dcl 
                      in compile_dcls env' dcls'

let rec compile_cmd env (c : cmd) : code = match c with 
    | If (e, c) -> (let label = "IFEND" ^ string_of_int (next_val ()) in
                    match e with 
                    | Cop (e1, o, e2) -> compile_cond env o e1 e2 label
                                        @ compile_cmd env c
                                        @ [Label label])
    | Ite (e, c1, c2) -> (let counter = string_of_int (next_val ()) in
                            let label_end = "IFEND" ^ counter in
                            let label_false = "IFFALSE" ^ counter in
                match e with 
                | Cop (e1, o, e2) -> compile_cond env o e1 e2 label_false
                                    @ compile_cmd env c1
                                    @ [J label_end]
                                    @ [Label label_false]
                                    @ compile_cmd env c2
                                    @ [Label label_end])
    | While (e, c) -> ( let counter = string_of_int (next_val ()) in
                        let label_loop = "WLOOP" ^ counter in
                        let label_end = "WEND" ^ counter in
                match e with 
                | Cop (e1, o, e2) -> [Label label_loop]
                                    @ compile_cond env o e1 e2 label_end
                                    @ compile_cmd env c
                                    @ [J label_loop]
                                    @ [Label label_end])
    | Asgn (s, e) -> compile_exp env (Env.find s env) e
    | Begin (Program (dcls, cmds)) -> let env' = compile_dcls env dcls in
                                      compile_cmds env' cmds 
    | Print e -> [Li (v0, 1)] 
                 @ compile_exp env a0 e
                 @ [SysCall]
    | Input s -> let s_register = Env.find s env in 
                 [Li (v0, 4); La (a0, "sinp"); SysCall; Li (v0, 5); SysCall; Move (s_register, v0)]

and compile_cmds env (cs : cmd list) : code = match cs with 
    | [] -> [] 
    | c :: cs -> compile_cmd env c @ compile_cmds env cs
    


let compile_program (p : program) = match p with 
    | Program (dcls, cmds) -> let env = compile_dcls Env.empty dcls in     
        [Verbatim ".set noreorder"; Verbatim ".data"; Label "sinp"; Verbatim ".asciiz \"INPUT>\"  "; Verbatim ".text"; Label "_start"]
        @ compile_cmds env cmds
        @ [Li (v0, 10); SysCall]
        

val counter : int ref = {contents = 0}


val next_val : unit -> int = <fun>


error: compile_error

In [16]:
let example_if = "
VAR A; 
VAR MAX;
IF A > MAX THEN MAX := A
"

let mips =  (print_code (compile_program (parse_string example_if)));;

val example_if : string = "\nVAR A; \nVAR MAX;\nIF A > MAX THEN MAX := A\n"


error: compile_error

In [17]:
let example_ite = "
VAR A; 
VAR B;
VAR MAX;
IF A > B THEN MAX := A ELSE MAX := B
"

let mips =  (print_code (compile_program (parse_string example_ite)));;

val example_ite : string =
  "\nVAR A; \nVAR B;\nVAR MAX;\nIF A > B THEN MAX := A ELSE MAX := B\n"


error: compile_error

In [18]:
let example_ite2 = "
VAR x; VAR y; VAR max;
x := 3; 
y := 5;
IF x > y
THEN 
    max := x
ELSE 
    max := y
"

let mips =  (print_code (compile_program (parse_string example_ite2)));;

val example_ite2 : string =
  "\nVAR x; VAR y; VAR max;\nx := 3; \ny := 5;\nIF x > y\nTHEN \n    max := x\nELSE \n    max := y\n"


error: compile_error

In [19]:
let example_block = "
VAR A; VAR B; VAR C; 
A := 1; B := 2; C := 3; 
BEGIN 
    VAR A; VAR B; 
    A := 4; B := 5; C := 6; 
    BEGIN 
        VAR A; 
        A := 7; 
        B := 8; 
        C := 9 
    END; 
    A := 10; B:= 11; C := 12
END; 
A := 13; B := 14; C := 15
"

let mips =  (print_code (compile_program (parse_string example_block)));;

val example_block : string =
  "\nVAR A; VAR B; VAR C; \nA := 1; B := 2; C := 3; \nBEGIN \n    VAR A; VAR B; \n    A := 4; B := 5; C := 6; \n    BEGIN \n        VAR A; \n        A := 7; \n        B := 8; \n        C := 9 \n    END; \n    A := 10; B:= 11; C := 12\nEND; \nA := 13; B := 14; C := 15\n"


error: compile_error

In [22]:
let example_while = "
VAR n; 
n := 5; 
WHILE n > 0 DO
BEGIN 
PRINT n; 
n := n - 1
END
"

let example_while = "
VAR n; 
n := 5; 
WHILE n > 0 DO
BEGIN 
PRINT n; 
n := n - 1
END
"

let mips =  (print_code (compile_program (parse_string example_while)));;let mips =  (print_code (compile_program (parse_string example_while)));;

val example_while : string =
  "\nVAR n; \nn := 5; \nWHILE n > 0 DO\nBEGIN \nPRINT n; \nn := n - 1\nEND\n"


val example_while : string =
  "\nVAR n; \nn := 5; \nWHILE n > 0 DO\nBEGIN \nPRINT n; \nn := n - 1\nEND\n"


error: compile_error

In [21]:
let example_input = "
VAR x; 
INPUT x; 
PRINT (x * x)
"

let mips =  (print_code (compile_program (parse_string example_input)));;

val example_input : string = "\nVAR x; \nINPUT x; \nPRINT (x * x)\n"


error: compile_error