#### Annotating RISC-V Code

**Note:** This notebook needs to be run in a folder with the P0 compiler.

For the following questions, study first `CGriscv.ipynb`, in particular the parts on procedure calling and array indexing.

In [None]:
import nbimporter; nbimporter.options["only_defs"] = False
from P0 import compileString

Generate the RISC-V assembly code of the following P0 program:

In [None]:
compileString("""
var s: integer
var a: [0 .. 9] → integer
procedure sum(l, h: integer)
  var i: integer
    s := 0; i := l
    while i ≤ h do
        s := s + a[i]; i := i + 1
program printsum
  a[0] := 5; a[1] := 7
  sum(0, 1)
  write(s)
""", 'printsum.s', target = 'riscv')

We can view the RISCV assembly code:

In [None]:
!cat printsum.s

You can execute it with a RISCV emulator library in Python:

Note that the riscemu library has issues with the standard method read() in P0, you have to execute it from the command line

In [None]:
!python3 -m riscemu printsum.s

Annotate each line of the generated RISCV file with an assignment or brief description what the instruction does, using register/variable names rather than offsets and addresses. For each procedure (`sum` and `printsum`), state the size of the parameters (`parsize`) and the size of the local variables (`localsize`). State for each local variable and parameter the offset used for FP-relative addressing. 

Note that `write` is a system call, `scall`, with the function to be executed, `1`, in `a0` and the parameter in `a1`.  `4` in `a2` means write 4 bytes. The P0 code generator unnecessarily pushes the parameter also on the stack.

YOUR ANSWER HERE

<pre style="font-family:monospace;color:royalblue">
    .data
s_: .space 4         ; global variable s (4 bytes)
a_: .space 40        ; global variable a (40 bytes)
	.text
	.globl sum
sum:                 ; parsize = 8 (2 params), localsize = 4
	addi sp, sp, -32 ; decrease stack pointer by n. n = 32 
                     ; (stack pointer is always kept 16-byte aligned)
                     ; n = (8 + parsize + localsize) rounded up
                     ; to the nearest 16
	sw ra, 20(sp)    ; push ra to SP + parsize + 12
	sw s0, 16(sp)    ; push s0/ FP  to SP + parsize + 8
	addi s0, sp, 24  ; s0/FP := SP + (n - 8)
                     ; s, a are global variables at s_, a_
                     ; currently the  stack has the following structure from top to bottom (stack grows downwards):
                     ; l at FP + 4     (stored here by the caller)
                     ; h at FP + 0   (stored here by the caller)
                     ; old ra at FP - 4
                     ; old s0/FP at FP - 8
                     ; i at FP -12  
	la s10, s_       ; load address of global variable s into s10
	sw zero, 0(s10)  ; s := 0
	lw s8, 4(s0)     ; s8 := l
	sw s8, -12(s0)   ; i := s8
L1:	
	lw s11, -12(s0)  ; s11 := i
	lw s9, 0(s0)     ; s9 := h
	blt s9, s11, L2  ; if s9 > s11 then PC := L2
L3:	
	lw s7, -12(s0)   ; s7 := i
	addi s3, zero, 4 ; s3 := 4
	mul s7, s7, s3   ; s7 = s7 * s3
	la s6, s_
	lw s5, 0(s6)     ; s5 = s
	lw s4, a_(s7)    ; s4 := Mem[a + s7]
	add s5, s5, s4   ; s5 := s5 + s4
	la s2, s_
	sw s5, 0(s2)     ; s := s5
	lw s10, -12(s0)  ; s10 := i
	addi s10, s10, 1 ; s10 := s10 + 1
	sw s10, -12(s0)  ; i := s10
	j L1
L2:	
	lw ra, 20(sp)    ; restore ra from SP + parsize + 12
	lw s0, 16(sp)    ; restore s0/FP from SP + parsize + 8
	addi sp, sp, 32  ; restore SP to SP + n
	ret              ; PC := ra
	.globl main
main:	
	jal ra, printsum ; call printsum, ra, PC := PC + 4, printsum
	addi a0, zero, 0     
	addi a7, zero, 93 ; call sysexit
	scall
	.globl printsum
printsum:             ; parsize = 0, localsize = 0
	addi sp, sp, -16  ; decrease stack pointer by n, n = 16
	sw ra, 12(sp)     ; push ra to SP + parsize + 12
	sw s0, 8(sp)      ; push s0/ FP  to SP + parsize + 8
	addi s0, sp, 16   ; s0/FP := SP + (n - 8)
	addi s8, zero, 5  ; s8 := 5
	la s11, a_
	sw s8, 0(s11)     ; Mem[a] := s8
	addi s9, zero, 7  ; s9 := 7
	la s7, a_
	sw s9, 4(s7)      ; Mem[a+4] := s9
	sw zero, -4(sp)   ; push 0 to SP - 4
	addi s3, zero, 1  ; s3 := 1
	sw s3, -8(sp)     ; push s3 to SP - 8
	jal ra, sum       ; call sum, ra, PC := PC + 4, sum
	la s6, s_         ; s6 := s
	lw s2, 0(s6)      ; s2 := s
	sw s2, -4(sp)     ; push s2 to SP - 4
	mv s5, a0         ; s5 := a0
	la s4, s_         ; s4 := s
	lw a1, 0(s4)      ; a1 := s (a1 is the register holding parameter for write)
	addi a0, zero, 1
	addi a2, zero, 4
	addi a7, zero, SCALL_WRITEINT 
	scall  ; call write
	mv a0, s5         ; a0 := s5
	lw ra, 12(sp)     ; restore ra from SP + parsize + 12
	lw s0, 8(sp)      ; restore s0/FP from SP + parsize + 8
	addi sp, sp, 16   ; restore SP to SP + n
	ret               ; PC := ra
 </pre>