There are multiple choices of interpreters, each with their own advantages and disadvantages.
A piece of Forth code will look like the following, in memory:
digraph {
rankdir=TB;
node [shape="record"];
prog [label="<0> Forth\ninterpreter | <1> Address of Forth method | <2> Address of Native method | ... | <n> Exit"];
interp [label="<0> Push next_inst\nonto return stack | <1> Set next_inst\nfrom r0 | <n> Branch to `Next`"];
subrA [label="<0> Forth\ninterpreter | ... | <n> Exit"];
subrB [label="<0> Next\ncell | <1> Instruction | ... | <n> Branch to\n`Next`"];
subrExit [label="<0> Next\ncell | <1> Pop next_inst\nfrom return stack | <2> Branch to\n`Next`"];
next [label="<0> Load next_inst to r0 | Increment next_inst | Load interpreter at r0 | Interpret r0+4"];
prog:0 -> interp:0;
subrA:0 -> interp:0 [minlen=2];
interp:0:s -> next:0:n [color=white,minlen=2];
interp:n:s -> next:0:n [constraint=false];
prog:1 -> subrA:0:n [minlen=2];
prog:2 -> subrB:0:n [minlen=2];
prog:n:s -> subrExit:0;
subrA:n:s -> subrExit:0 [minlen=2];
subrB:0:n -> subrB:1:n;
subrExit:0:n -> subrExit:1:n;
subrB:n:s -> next:0:n [constraint=false];
subrExit:2:s -> next:0:n [constraint=false];
}
#+RESULTS[f99a82b51e953c459dbfdfa274f4577c67ddd2e6]:
Let’s first look at the leftmost block, a forth code block, composed of forth words, which may not be 32-bits (especially for systems with small memory, but it is in this case). The first forth word of a code block is always the interpreter, which here points to a native code block that executes the rest of the forth words.
The forth interpreter pushes the next_inst
onto the stack (like the
program counter, but for the interpreted words), loads the start of
the current code block from r0
into next_inst
, and begins
executing the current code block by jumping to next
, another native code block
Of note here is the interpreter, which is how we differentiate between
Forth and native methods. For Forth methods, it pushes the
next_instr
onto the return stack; for native methods it is just a
pointer to the beginning of the assembled native code.
The Forth interpreter keeps executing the interpreters of each call in the current method, including the exit call.
For code-blocks, we will be including both assembly code, and Forth code (not standard, but will work once we have got to section TODO).
Until section sec:forth-assembly (or saying it before the code block),
all the assembly code goes into file stage0-machine-arm.s
.
{{{cstart}}}
next_inst .req r11
data_space .req r11
rsp .req r12
.macro .execute reg=r0
ldr pc, [\reg], #4 /* FWSIZE */
.endm
.macro .next
b next
.endm
.macro .exit
ldr next_inst, [rsp], #4
b next
.endm
.macro .fw word:req, rest:vararg
.ifnc "\word","L"
.4byte \word /* FWSIZE */
.endif
.ifnb \rest ; .fw \rest ; .endif
.endm
.macro .forth_interpreter
.fw forth_interpreter
.endm
.macro .asm_interpreter
.fw 1f
1:
.endm
.macro .cell init=0
.word \init
.endm
forth_interpreter:
str next_inst, [rsp, #-4]!
mov next_inst, r0
/* b next */
next:
ldr r0, [next_inst], #4 /* FWSIZE */
.execute
{{{cmid}}}
:ASM FORTH-INTERPRETER
{ next_inst } return_stack PUSH
r0 INTO next_inst MOV
( or next_inst FROM r0 MOV )
LABEL: NEXT
next_inst INTO r0 2 LDRH+
r0 INTO r1 4 LDR+ ( vs +LDR and +LDR! )
r1 BX
:ASM EXIT
TODO
{{{cend}}}
{{{cstart}}}
next_inst .req r11
data_space .req r11
rsp .req r12
.macro .execute reg=r0
bx \reg
.endm
.macro .next
ldr pc, [next_inst], #4 /* FWSIZE */
.endm
.macro .exit
ldr next_inst, [rsp], #4
.next
.endm
.macro .fw word:req, rest:vararg
.ifnc "\word","L"
.4byte \word /* FWSIZE */
.endif
.ifnb \rest ; .fw \rest ; .endif
.endm
.macro .forth_interpreter
bl forth_interpreter
.endm
.macro .asm_interpreter
.endm
.macro .cell init=0
.word \init
.endm
forth_interpreter:
str next_inst, [rsp, #-4]!
mov next_inst, lr
.next
{{{cmid}}} {{{cend}}}
Anything that calls a subroutine should save LR {{{cstart}}}
next_inst .req lr
rsp .req r12
.macro .execute reg=r0
bx \reg
.endm
.macro .next
bx lr
.endm
.macro .exit
// This is rsp+4 because EXIT (implemented as asm) doesn't
// push its LR to RSP
ldr pc, [rsp], #4
.endm
.macro .fw word:req, rest:vararg
.ifc "\word","L"
.cellw \rest
.else
bl \word /* FWSIZE */
.ifnb \rest ; .fw \rest ; .endif
.endif
.endm
.macro .cellw n:req, rest:vararg
.cell \n
.ifnb \rest ; .fw \rest ; .endif
.endm
.macro .forth_interpreter
/*push {lr}
mov r0, #'\t'
bl uart_putc
mov r0, #'-'
rsb r2, rsp, #0x4000
1: bl uart_putc
subs r2, r2, #1
bcs 1b
mov r0, #' '
bl uart_putc
mov r0, pc
bl puthex
pop {lr}*/
str lr, [rsp, #-4]!
.endm
.macro .asm_interpreter
/*push {lr}
mov r0, #'\t'
bl uart_putc
mov r0, #'-'
rsb r2, rsp, #0x4000
1: bl uart_putc
subs r2, r2, #1
bcs 1b
mov r0, #' '
bl uart_putc
mov r0, pc
bl puthex
pop {lr}*/
.endm
.macro .cell init=0
.word \init
.endm
{{{cmid}}}
:ASM FORTH-INTERPRETER
{ next_inst } return_stack PUSH
r0 INTO next_inst MOV
( or next_inst FROM r0 MOV )
LABEL: NEXT
next_inst INTO r0 2 LDRH+
r0 INTO r1 4 LDR+ ( vs +LDR and +LDR! )
r1 BX
:ASM EXIT
TODO
{{{cend}}}
tohex:
cmp r0, #10
addge r0, #'A'-10
addlt r0, #'0'
bx lr
puthex:
push {r0-r4,lr}
ror r2, r0, #28 /* 01 23 45 67 */
mov r0, #'0' ; bl uart_putc
mov r0, #'x' ; bl uart_putc
mov r3, #15
mov r4, #8
puthex_loop:
and r0, r2, r3 ; bl tohex ; bl uart_putc
ror r2, #28
subs r4, #1
bne puthex_loop
puthex_end:
mov r0, #'\n' ; bl uart_putc
pop {r0-r4,pc}
The exit call pops the previously saved next_instr
, then continuing
executing from there on by jumping to next
.
TODO: Have &ERR as the first thing on the return stack, so that when we pop off one too many, it will be detected.
Next, we need to add the basic words (words being procedures, methods, functions, or operators) of Forth, which we will need to implement natively.
But before we implement those words, we need to make them findable by the Forth system, for which we have to discuss the simple Forth dictionary. The dictionary is a simple linked-list containing the flags, name of the word, and the interpreter along with the code, as discussed above.
digraph {
rankdir=LR;
node [shape="record"];
DROP_XT [label="Drop XT", shape=none];
DUP_XT [label="Dup XT", shape=none];
DROP [label="<0> Next entry | Flags |Counted string\n\"DROP\\0\" | Padding | <xt> Interpreter | Code | ..."];
DUP [label="<0> Next entry | Flags | Counted string\n\"DUP\\0\" | <xt> Interpreter | Code | ..."];
LAST_ENTRY [label="End"];
LATEST -> DROP:0
DROP_XT -> DROP:xt
DUP_XT -> DUP:xt
DROP:0 -> DUP:0
DUP:0 -> LAST_ENTRY
}
#+RESULTS[414409f65d09462df2bc5443a9c55f3713246576]:
Here a counted string means that the first element of the string is a word (four bytes) containing the length of the string (in bytes), which is followed by the bytes of the string, including a trailing NULL byte, and padded to Forth-word boundary.
To help with making dictionary entries, we will need the following macro, which creates the header for a dictionary entry. This includes the next entry pointer, its flags, name, and finally an assembler label to use with hand-written word implementations. But it doesn’t include the interpreter, so it can be used to create both native and Forth words.
{{{cstart}}}
.set previous_entry, 0
.macro .entry name:req, label, imm=0, hid=0
.balign 4 /* Align to power of 2 */
1:.cell previous_entry ; .set previous_entry, 1b
.byte \hid, \imm ; .balign 4
.cell 2f-3f ; 3:.ascii "\name" ; 2: .byte 0
.balign 4 /* Align to power of 2 */
.ifc _,\label
.globl \name ; \name :
.else
.globl \label ; \label :
.endif
.endm
{{{cmid}}}
: CREATE ( "<spaces>name" -- )
align
here latest @ , latest !
0 C, 0 C, align \ flags
here cell-size allot
bl word' \ addr start len
nip 2dup swap ! \ addr len
nip 1 + allot
align ; \ padding
{{{cend}}}
The first dictionary entry is also the simplest. Remember that the `entry’ macro doesn’t include the interpreter, so this just points to the native code for exit, which pops the forth return stack and continues executing from there.
.entry EXIT, _
.asm_interpreter
.exit
To help define the rest of the machine-dependent words quicker, we need a couple of macros first.
{{{cstart}}}
.macro .insts i, insts:vararg
\i ; .ifnb \insts ; .insts \insts ; .endif
.endm
.macro .fasm1 name:req, label, pop, i:vararg
.entry \name, \label
.asm_interpreter
.ifnc _,\pop ; pop {\pop} ; .endif
.insts \i
.endm
.macro .fasm name:req, label, pop, push, i:vararg
.fasm1 \name, \label, \pop, \i
.ifnc _,\push ; push {\push} ; .endif
.next
.endm
.macro .binops name:req, label, op:req, rest:vararg
.fasm \name, \label, r0-r1, r1, "\op r1, r0"
.ifnb \rest ; .binops \rest ; .endif
.endm
.macro .binrels name:req, label, rel:req, rest:vararg
.fasm1 \name, \label, r0-r1, "cmp r1, r0"
mov r0, #0 ; mov\rel r0, #-1
push {r0} ; .next
.ifnb \rest ; .binrels \rest ; .endif
.endm
{{{cmid}}}
TODO
{{{cend}}}
We are now ready to define the basic Forth words in assembly, on top
of which we will build the rest of the Forth system. The EXIT
we
have already defined above.
{{{cstart}}}
.binops "+", ADD, add, "-", SUB, sub, "*", STAR, mul
.binops "LSHIFT", _, lsl, "RSHIFT", _, lsr
.binops "&", AND, and, "|", OR, orr, "XOR", _, eor
.binrels "<>", NOT_EQUAL, ne, "U<", U_LESS_THAN, lo
.binrels "\x3d", EQUAL, eq, "U>", U_GREATER_THAN, hi
.binrels "<", LESS_THAN, lt, ">", GREATER_THAN, gt
.fasm "NEGATE", _, r0, r0, "rsb r0, #0"
.fasm "INVERT", _, r0, r0, "mvn r0, r0"
.fasm "C\x40", C_FETCH, r0, r0, "ldrB r0, [r0]"
.fasm "\x40", FETCH, r0, r0, "ldr r0, [r0]" /* FWSIZE */
.fasm "C!", C_STORE, r0-r1, _, "strB r1, [r0]"
.fasm "!", STORE, r0-r1, _, "str r1, [r0]" /* FWSIZE */
{{{cmid}}}
TODO
{{{cend}}}
TODO: Separate vs contiguous data-space (in case icache and dcache coherency extends to reads, not just writes)
{{{cstart}}}
// TODO: SUBROUTINE .fasm1 "(BRANCH)", BRANCH, _, "ldr r0, [data_space]"
// TODO: SUBROUTINE add next_inst, r0 ; .next /* FWSIZE */
// TODO: SUBROUTINE .fasm1 "(?BRANCH)", ZBRANCH, r1, "ldr r0, [data_space]"
// TODO: SUBROUTINE cmp r1, #0 ; addeq next_inst, r0 ; addne next_inst, #4
// TODO: SUBROUTINE .next /* FWSIZE */
// TODO: SUBROUTINE .fasm "[\x27]", LIT, _, r0, "ldr r0, [data_space], #4" /* FWSIZE */
.fasm1 "(BRANCH)",BRANCH, _, "ldr r0, [next_inst]"
add next_inst, next_inst, r0 /* FWSIZE */
.next
.fasm1 "(?BRANCH)", ZBRANCH, r1, "ldr r0, [next_inst]"
cmp r1, #0
addeq next_inst, next_inst, r0
addne next_inst, #4
.next /* FWSIZE */
.fasm "[\x27]", LIT, _, r0, "ldr r0, [next_inst], #4" /* FWSIZE */
.macro BRANCH, pos
b .+\pos
.endm
{{{cmid}}}
TODO
{{{cend}}}
{{{cstart}}}
.fasm "CELL-SIZE", CELL_SIZE, _, r0, "mov r0, #4" /* CELLSIZE */
.fasm "CHAR-SIZE", CHAR_SIZE, _, r0, "mov r0, #1" /* CHARSIZE */
.fasm "NIP", _, r0-r1, r0
.fasm "DROP", _, _, _, "add sp, #4" /* CELLSIZE */
.fasm "DUP", _, _, r0, "ldr r0, [sp]"
.fasm "OVER", _, _, r0, "ldr r0, [sp, #4]" /* CELLSIZE */
.fasm "PICK", _, r0, r0, "ldr r0, [sp, r0, LSL #2]" /* CELLSIZE */
.fasm "ROT", _, r0-r2, r2, "push {r0-r1}"
.fasm "SWAP", _, r0-r1, r1,"push {r0}"
.fasm "R\x40", R_FETCH, _, r0, "ldr r0, [rsp]" /* FWSIZE */
.fasm "R>", R_FROM, _, r0, "ldr r0, [rsp], #4" /* FWSIZE */
.fasm ">R", TO_R, r0, _, "str r0, [rsp, #-4]!" /* FWSIZE */
.fasm "DEPTH", _, _, r0, "rsb r0, sp, #0x8000", "lsr r0, #2" /* FWSIZE */
{{{cmid}}}
TODO
{{{cend}}}
There are also a couple of variables we need, this goes into a
different file (vars.s
), so that the previous_entry
points to the
latest defined Forth word.
{{{cstart}}}
.data
.balign 4
HERE_LOC: .4byte DATA_END
.globl previous_entry
LATEST_LOC: .4byte previous_entry
STATE_LOC: .4byte 0
{{{cmid}}}
TODO
{{{cend}}}
We also need to implement functions for input/output.
{{{cstart}}}
.fasm "EMIT", _, r0, _, "push {lr}","bl uart_putc", "pop {lr}"
.fasm "KEY", _, _, r0, "push {lr}", "bl uart_getc", "bl uart_putc", "pop {lr}"
.fasm "HEX.", HEX_PRINT, r0, _, "push {lr}","b puthex", "pop {lr}"
{{{cmid}}}
TODO
{{{cend}}}
{{{cstart}}}
/* TODO: Use this more liberally */
.macro .fdef1 name:req, label, imm, hidden, rest:vararg
.entry \name, \label, \imm, \hidden
.forth_interpreter
.ifnb \rest ; .fw \rest ; .endif
.endm
.macro .fdef name:req, label, rest:vararg
.fdef1 \name, \label, 0, 0, \rest
.endm
{{{cmid}}} {{{cend}}}
.fdef "1-", DECR, LIT, L,1, SUB, EXIT
.fdef "1+", INCR, LIT, L,1, ADD, EXIT
.fdef "2DUP", TWO_DUP, OVER, OVER, EXIT
.fdef "2DROP", TWO_DROP, DROP, DROP, EXIT
.fdef "-ROT", NROT, ROT, ROT, EXIT
.fdef "2>R", TWO_TO_R, R_FROM, NROT, SWAP
.fw TO_R, TO_R, TO_R, EXIT
.fdef "2R>", TWO_R_FROM, R_FROM, R_FROM
.fw R_FROM, ROT, TO_R, SWAP, EXIT
.fdef "2RDROP", TWO_R_DROP, R_FROM, R_FROM
.fw R_FROM, TWO_DROP, TO_R, EXIT
.fdef "2R\x40", TWO_R_FETCH, R_FROM
.fw TWO_R_FROM, TWO_DUP, TWO_TO_R, ROT
.fw TO_R, EXIT
.fdef "TRUE", _, LIT, L,-1, EXIT
.fdef "FALSE", _, LIT, L,0, EXIT
.fdef "HERE_VAR", _, LIT, L,HERE_LOC, EXIT
.fdef "LATEST", _, LIT, L,LATEST_LOC, EXIT
.fdef "STATE", _, LIT, L,STATE_LOC, EXIT
.fdef "HERE", _, HERE_VAR, FETCH, EXIT
.fdef "CHAR+", CHAR_ADD, CHAR_SIZE, ADD, EXIT
.fdef "CELL+", CELL_ADD, CELL_SIZE, ADD, EXIT
.fdef "CHARS", _, CHAR_SIZE, STAR, EXIT
.fdef "CELLS", _, CELL_SIZE, STAR, EXIT
.fdef "C\x2c", C_COMMA, HERE, C_STORE, HERE
.fw CHAR_ADD, HERE_VAR, STORE, EXIT
.fdef "\x2c", COMMA, HERE, STORE, HERE
.fw CELL_ADD, HERE_VAR, STORE, EXIT
{{{cstart}}}
.fdef "ALLOT", _
.fw HERE, ADD, HERE_VAR, STORE, EXIT
{{{cmid}}}
: ALLOT HERE + HERE_VAR ! ;
{{{cend}}}
{{{cstart}}}
.fdef "ALIGN", _
.fw HERE, CELL_SIZE, DECR, ADD
.fw CELL_SIZE, DECR, INVERT, AND
.fw HERE_VAR, STORE, EXIT
{{{cmid}}}
: ALIGN
HERE cell-size 1- + cell-size 1- invert and
HERE_VAR ! ;
{{{cend}}}
{{{cstart}}}
.fdef "CREATE", _
.fw ALIGN
.fw HERE, LATEST, FETCH
.fw COMMA, LATEST, STORE
.fw LIT, L,0, C_COMMA
.fw LIT, L,0, C_COMMA, ALIGN
.fw BL, WORD
.fw CELL_SIZE, ALLOT
.fw FETCH, CHARS, ALLOT
.fw LIT, L,0, C_COMMA
.fw EXIT
{{{cmid}}}
: CREATE ( "<spaces>name" -- )
align
here latest @ , latest !
0 C, 0 C, align \ flags
bl word \ c-str
cell-size allot \ allot space for len
@ chars allot \ allot len characters
0 C, \ For C string compatibility
align ; \ padding
{{{cend}}}
{{{cstart}}}
.fdef "BALIGN", BALIGN, DECR, SWAP, OVER
.fw ADD, SWAP, INVERT, AND, EXIT
.fdef "ENTRY-NEXT", ENTRY_NEXT, EXIT
.fdef "ENTRY-FLAGS", ENTRY_FLAGS, CELL_ADD, EXIT
.fdef "ENTRY-LEN", ENTRY_LEN, LIT, L,2
.fw CELLS, ADD, EXIT
.fdef "ENTRY-CHARS", ENTRY_CHARS, LIT, L,3
.fw CELLS, ADD, EXIT
.fdef "ENTRY-XT", ENTRY_XT, DUP
.fw ENTRY_LEN, FETCH, LIT, L,1, ADD, SWAP
.fw ENTRY_CHARS, ADD, LIT, L,4, BALIGN, EXIT
{{{cmid}}}
TODO
{{{cend}}}
{{{cstart}}}
.fdef "HIDDEN?", HIDDENP
.fw ENTRY_FLAGS, C_FETCH, EXIT
{{{cmid}}}
: HIDDEN? entry-flags C@ ;
{{{cend}}}
{{{cstart}}}
.fdef "IMMEDIATE?", IMMEDIATEP
.fw ENTRY_FLAGS, CHAR_ADD, C_FETCH, EXIT
{{{cmid}}}
: IMMEDIATE? ( xt -- -1|0 )
entry-flags char+ C@ ;
{{{cend}}}
Toggles hidden status of a given xt
{{{cstart}}}
.fdef "HIDE", _, CELL_ADD, DUP, C_FETCH
.fw INVERT, SWAP, C_STORE, EXIT
{{{cmid}}}
: HIDE ( xt -- )
cell+ dup C@
invert swap C! ;
{{{cend}}}
{{{cstart}}}
.fdef1 "IMMEDIATE", _, -1 /* immediate */
.fw LATEST, FETCH
.fw TRUE, SWAP, CELL_ADD, CHAR_ADD, C_STORE, EXIT
{{{cmid}}}
: IMMEDIATE ( -- )
LATEST @
true swap cell+ char+ C!
{{{cend}}}
- TODO
- Explain “c-addr u” and fwsize
{{{cstart}}}
.fdef "FIND\x27", FIND_NEW
.fw LATEST, FETCH
FIND_LOOP: /* ( c-addr u entry ) */
.fw DUP, LIT, L,0, EQUAL, ZBRANCH, L,(FIND_NON_END-.)
.fw DROP, DROP, LIT, L,0, EXIT
FIND_NON_END:
.fw DUP, HIDDENP, INVERT
.fw ZBRANCH, L,(FIND_NEXT_ENTRY-.)
.fw TWO_DUP, ENTRY_LEN, FETCH, EQUAL
.fw ZBRANCH, L,(FIND_NEXT_ENTRY-.)
/* c-addr u entry */
.fw TWO_DUP, ENTRY_CHARS
.fw LIT, L,4, PICK
/* c-addr u entry u entry-str c-addr */
.fw MEMCMP, ZBRANCH, L,(FIND_NEXT_ENTRY-.)
.fw NIP, NIP
.fw DUP, ENTRY_XT
.fw SWAP, IMMEDIATEP
.fw ZBRANCH, L,(NON_IMM-.), LIT, L,1, BRANCH, L,(IMM_END-.)
NON_IMM:
.fw LIT, L,-1
IMM_END:
.fw EXIT
FIND_NEXT_ENTRY:
.fw FETCH
.fw BRANCH, L,(FIND_LOOP-.)
{{{cmid}}}
: FIND' ( c-addr u -- c-addr 0 | xt 1 | xt -1 )
latest @
begin \ c-addr u entry
dup 0 = if drop drop 0 exit then
dup hidden? invert if
2dup entry-len = if \ c-addr u entry entry-len u
2dup entry-chars 4 pick
\ c-addr u entry u entry-str c-addr
memcmp if \ c-addr u entry
nip nip \ entry
dup entry-xt
swap immediate? if 1 else -1 then
exit
then
then
then
@ \ Fetch next entry
again ;
{{{cend}}}
We also need to write the memory comparison, as well as the utilities for the flags.
{{{cstart}}}
.fdef "MEMCMP", _
.fw ROT, LIT, L,0
.fw TWO_TO_R
MEMCMP_LOOP:
.fw TWO_DUP, R_FETCH, ADD, C_FETCH
.fw SWAP, R_FETCH, ADD, C_FETCH
.fw CHAR_EQUAL, INVERT, ZBRANCH, L,(MEMCMP_NEXT-.)
.fw TWO_R_DROP, TWO_DROP, FALSE, EXIT
MEMCMP_NEXT:
.fw R_FROM, LIT, L,1, ADD, TO_R
.fw TWO_R_FETCH, EQUAL
.fw ZBRANCH, L,(MEMCMP_LOOP-.)
.fw TWO_R_DROP
.fw TWO_DROP, TRUE, EXIT
{{{cmid}}}
: MEMCMP ( len a b -- true | false )
rot 0 do
2dup i + C@ swap i + C@
= invert if unloop 2drop false exit then
loop
2drop true ;
{{{cend}}}
{{{cstart}}}
// TODO: This is indirect at the moment
.fasm1 "EXECUTE", EXECUTE, r0
.execute
{{{cmid}}}
:ASM EXECUTE-INTERPRETER
{ r0 } value_stack POP
r1 r0 4 LDR+ \ TODO
r0 BX ;
{{{cend}}}
SOURCE-ID