#TinyFive


---
## Setup

In [None]:
!git clone https://github.com/OpenMachine-ai/tinyfive.git
%cd tinyfive
from tinyfive import tinyfive
import numpy as np

m = tinyfive(mem_size=1000)  # instantiate RISC-V machine with 1KB of memory

Cloning into 'tinyfive'...
remote: Enumerating objects: 191, done.[K
remote: Counting objects: 100% (41/41), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 191 (delta 31), reused 21 (delta 12), pack-reused 150[K
Receiving objects: 100% (191/191), 133.52 KiB | 1.10 MiB/s, done.
Resolving deltas: 100% (116/116), done.
/content/tinyfive


---
## Example 1: Multiply two numbers

### Example 1.1: Use upper-case instructions (option A) with back-door loading of registers

In [None]:
m.x[11] = 6         # manually load '6' into register x[11]
m.x[12] = 7         # manually load '7' into register x[12]
m.MUL(10, 11, 12)  # x[10] := x[11] * x[12]
print(m.x[10])

42


### Example 1.2: Same as example 1.1, but now load the data from memory
Specifically, the data values are stored at addresses 0 and 4. Here, each value is 32 bits wide (i.e. 4 bytes wide), which occupies 4 addresses in the byte-wide memory.

In [None]:
m.write_i32(6, 0)  # manually write '6' into mem[0] (memory @ address 0)
m.write_i32(7, 4)  # manually write '7' into mem[4] (memory @ address 4)
m.LW (11, 0,  0)   # load register x[11] from mem[0 + 0]
m.LW (12, 4,  0)   # load register x[12] from mem[4 + 0]
m.MUL(10, 11, 12)  # x[10] := x[11] * x[12]
print(m.x[10])

42


### Example 1.3: Same as example 1.2, but now use `asm()` and `exe()` (option B)
The assembler function `asm()` takes an instruction and converts it into machine code and stores it in memory at address s.pc. Once the entire assembly program is written into memory `mem[]`, the `exe()` function (aka ISS) can then exectute the machine code stored in memory.

In [None]:
m.write_i32(6, 0)  # manually write '6' into mem[0] (memory @ address 0)
m.write_i32(7, 4)  # manually write '7' into mem[4] (memory @ address 4)

# store assembly program in mem[] starting at address 4*20
m.pc = 4*20
m.asm('lw',  11, 0,  0)   # load register x[11] from mem[0 + 0]
m.asm('lw',  12, 4,  0)   # load register x[12] from mem[4 + 0]
m.asm('mul', 10, 11, 12)  # x[10] := x[11] * x[12]

# execute program from address 4*20: execute 3 instructions and then stop
m.exe(start=4*20, instructions=3)
print(m.x[10])

42


---
## Example 2: Add two vectors
We are using the following memory map for adding two 8-element vectors `res[] := a[] + b[]`, where each vector element is 32 bits wide (i.e. each element occupies 4 byte-addresses in memory).

|Byte address |	Contents |
|-------------|----------|
| 0 .. 4\*7	  | a-vector: a[0] is at address 0, a[7] is at address 4\*7 |
| 4\*8 .. 4\*15 |	b-vector: b[0] is at address 4\*8, b[7] is at address 4\*15 |
| 4\*16 .. 4\*23 | result-vector: res[0] is at address 4\*16, res[7] is at  address 4\*23 |

### Example 2.1: Use upper-case instructions (option A) with Python for-loop

In [None]:
# generate 8-element vectors a[] and b[] and store them in memory
a = np.random.randint(100, size=8)
b = np.random.randint(100, size=8)
m.write_i32_vec(a, 0)    # write vector a[] to mem[0]
m.write_i32_vec(b, 4*8)  # write vector b[] to mem[4*8]

# pseudo-assembly for adding vectors a[] and b[] using Python for-loop
for i in range(0, 8):
  m.LW (11, 4*i,      0)   # load x[11] with a[i] from mem[4*i + 0]
  m.LW (12, 4*(i+8),  0)   # load x[12] with b[i] from mem[4*(i+8) + 0]
  m.ADD(10, 11,       12)  # x[10] := x[11] + x[12]
  m.SW (10, 4*(i+16), 0)   # store results in mem[], starting at address 4*16

# compare results against golden reference
res = m.read_i32_vec(8, 4*16)  # read result vector from address 4*16
ref = a + b                    # golden reference: simply add a[] + b[]
print(res - ref)               # print difference (should be all-zero)

[0 0 0 0 0 0 0 0]


### Example 2.2: Same as example 2.1, but now use asm() and exe() functions without branch instructions (option B)

In [None]:
# generate 8-element vectors a[] and b[] and store them in memory
a = np.random.randint(100, size=8)
b = np.random.randint(100, size=8)
m.write_i32_vec(a, 0)    # write vector a[] to mem[0]
m.write_i32_vec(b, 4*8)  # write vector b[] to mem[4*8]

# store assembly program in mem[] starting at address 4*48
m.pc = 4*48
for i in range(0, 8):
  m.asm('lw',  11, 4*i,      0)   # load x[11] with a[i] from mem[4*i + 0]
  m.asm('lw',  12, 4*(i+8),  0)   # load x[12] with b[i] from mem[4*(i+8) + 0]
  m.asm('add', 10, 11,       12)  # x[10] := x[11] + x[12]
  m.asm('sw',  10, 4*(i+16), 0)   # store results in mem[], starting at address 4*16

# execute program from address 4*48: execute 8*4 instructions and then stop
m.exe(start=4*48, instructions=8*4)

# compare results against golden reference
res = m.read_i32_vec(8, 4*16)  # read result vector from address 4*16
ref = a + b                    # golden reference: simply add a[] + b[]
print(res - ref)               # print difference (should be all-zero)

[0 0 0 0 0 0 0 0]


### Example 2.3: Same as example 2.2, but now use asm() and exe() functions with branch instructions (option C)

In [None]:
# generate 8-element vectors a[] and b[] and store them in memory
a = np.random.randint(100, size=8)
b = np.random.randint(100, size=8)
m.write_i32_vec(a, 0)    # write vector a[] to mem[0]
m.write_i32_vec(b, 4*8)  # write vector b[] to mem[4*8]

# store assembly program starting at address 4*48
m.pc = 4*48
# x[13] is the loop-variable that is incremented by 4: 0, 4, .., 28
# x[14] is the constant 28+4 = 32 for detecting the end of the for-loop
m.lbl('start')                 # define label 'start'
m.asm('add',  13, 0, 0)        # x[13] := x[0] + x[0] = 0 (because x[0] is always 0)
m.asm('addi', 14, 0, 32)       # x[14] := x[0] + 32 = 32 (because x[0] is always 0)
m.lbl('loop')                  # label 'loop'
m.asm('lw',   11, 0,    13)    # load x[11] with a[] from mem[0 + x[13]]
m.asm('lw',   12, 4*8,  13)    # load x[12] with b[] from mem[4*8 + x[13]]
m.asm('add',  10, 11,   12)    # x[10] := x[11] + x[12]
m.asm('sw',   10, 4*16, 13)    # store x[10] in mem[4*16 + x[13]]
m.asm('addi', 13, 13,   4)     # x[13] := x[13] + 4 (increment x[13] by 4)
m.asm('bne',  13, 14, 'loop')  # branch to 'loop' if x[13] != x[14]
m.lbl('end')                   # label 'end'

# execute program: start at label 'start', stop when label 'end' is reached
m.exe(start='start', end='end')

# compare results against golden reference
res = m.read_i32_vec(8, 4*16)  # read result vector from address 4*16
ref = a + b                    # golden reference: simply add a[] + b[]
print(res - ref)               # print difference (should be all-zero)

# dump out state
m.dump_state()

[0 0 0 0 0 0 0 0]
pc   :  224
x[ 0]:    0, x[ 1]:    0, x[ 2]:    0, x[ 3]:    0
x[ 4]:    0, x[ 5]:    0, x[ 6]:    0, x[ 7]:    0
x[ 8]:    0, x[ 9]:    0, x[10]:  122, x[11]:   23
x[12]:   99, x[13]:   32, x[14]:   32, x[15]:    0
x[16]:    0, x[17]:    0, x[18]:    0, x[19]:    0
x[20]:    0, x[21]:    0, x[22]:    0, x[23]:    0
x[24]:    0, x[25]:    0, x[26]:    0, x[27]:    0
x[28]:    0, x[29]:    0, x[30]:    0, x[31]:    0


---
## Run all examples from file `examples.py`

In [None]:
!python3 examples.py

42
42
42
[0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0]
pc   :  224
x[ 0]:    0, x[ 1]:    0, x[ 2]:    0, x[ 3]:    0
x[ 4]:    0, x[ 5]:    0, x[ 6]:    0, x[ 7]:    0
x[ 8]:    0, x[ 9]:    0, x[10]:   34, x[11]:   27
x[12]:    7, x[13]:   32, x[14]:   32, x[15]:    0
x[16]:    0, x[17]:    0, x[18]:    0, x[19]:    0
x[20]:    0, x[21]:    0, x[22]:    0, x[23]:    0
x[24]:    0, x[25]:    0, x[26]:    0, x[27]:    0
x[28]:    0, x[29]:    0, x[30]:    0, x[31]:    0
