/
yoda
executable file
·2890 lines (2391 loc) · 108 KB
/
yoda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/bash
# shellcheck disable=SC2016 disable=SC2034 disable=SC2154 disable=SC2288
v="0.8.4"
# ----- about -------------------------------------------- #fold00
# times in UTC
# dates in post-gregorian Sol3
# numbers in decimal. Mostly.
# changelog now in directory changes. Or, type "changes" in yoda
# for changes to current version.
# yoda is an acronym of Yet Onather Delirious Application, and does
# naturally not refer to any greenish midget with pointy ears, albeit
# it had been said that reading yoda source aloud sounds like the very
# same midget talking.
# yoda implements a compiler which converts a close resemblance of Forth
# source code to bash functions, an interpreter which runs them, and
# a base vocabulary of useful functions, strongly orientated towards
# what a common Forth interpreter offers.
# What learned you have, unlearn you must!
# editor used is efte, folds in source files are therefore efte compatible.
# ----- shellcheck ignores ------------------------------- #fold00
# invoke shellcheck with: shellcheck -x yoda.
# SC2016: "expressions don't expand between single quotes"
# right, that's the reason for using single quotes there.
# SC2034 "... appears unused. Verify use (or export if used externally)"
# in most cases is "appears" the correct term.
# SC2154 "(warning): base is referenced but not assigned."
# is assigned, but shellcheck can't see how
# SC2288 This is interpreted as a command name ending with ','.
# That's how it's meant to be done.
# ----- declarations ------------------------------------- #fold00
shopt -s extglob
shopt -u nullglob
config="yoda.conf" # name of configuration file which may override any of the below settings
declare -i break=2 # break will return (0), exit (1) or execute coldvector (2)
declare -i clean=1 # empty stack on error/ctrl-c
read -r tmp _ < <(realpath "$0")
read -ra version <<< "${v//./ }"
mydir="${tmp%/*}"
doc="$mydir/doc" # documentation base directory
worddoc_prefix="word" # used as part of file names, prevents files to go hidden by leading period
worddoc="$doc/words/$worddoc_prefix" # directory of words documentation, also first part of word doc file name
dependencies="$mydir/dependencies" # dependency file, path and name
# only first added directory - and its subdirectories -
# contain forward referencable libs.
libdirs=() # array of possible library paths
for dir in \
"$mydir/lib" \
"$mydir" \
"/usr/local/lib/yoda"
do # loop through directory candidates
[[ -d "$dir" ]] && libdirs+=("$dir") # add directory if existing
done
nameprefix="yoda" # prefix to bash function names, _${xt} will be appended upon header creation
declare -i firstxt="1000" # names enumeration, initially. Can be any positive integer
declare -i xt=firstxt # names enumeration, incremented, starting with xt
declare -ai r=() s=(0) m=()
declare -a strings=() # string memory
declare -i sp=0 rp=0 dp=0 sdp=0
declare -a ss=()
declare -i s0 s1 s2 s3 # temp variables to hold stack items
declare -a raw target
declare -A doescode # items associated with defining word. Detokeniser extracted code
# naming:
# wordlist: name of a wordlist
# wordlistwid: wid of a wordlist
# convert wid to wordlist: $@ ( ${strings[wid]} )
declare -a wordlists # list of wordlists
declare -a builtin_wordlists=("only" "forth" "compiler" "unresolved")
for wordlist in "${builtin_wordlists[@]}"; do
declare -Ai "flags_$sdp" # header flags per vocabulary
declare -A "wordlist_$sdp" # this is the name of the wordlist array, named "wordlist_$wid"
declare -n "$wordlist"="wordlist_$sdp" # allow referencing builtins by name without indirection ( ${unresolved[...]} )
((${wordlist}wid=sdp)) # builtins can also be refered by wid, assigned here
wordlists+=("$sdp")
strings[sdp++]="$wordlist" # finally associate wid with name.
done
declare -ai order=("$forthwid" "$onlywid") # only forth
declare -i currentwid="${order[0]}" # definitions
declare -n context="wordlist_${order[0]}" # set pointer variables to array name of wordlists
declare -n current="wordlist_$currentwid"
declare -n flags="flags_$currentwid" # pointer var to vocabulary specific header flags
declare -a body # xt -> body for create
declare -a names # xt -> name
declare -a transients # transient headers.
declare -A where # source location file:line
declare -A lib # dependencies cache
declare -i filehandle=1 # 1=yoda. 0=tty. >1=loaded source
declare -a files=("tty" "$(realpath "$0")") # list of included files
declare -a loaded # list of inclusion counters for included files
declare -a conclude=() # compiler words can add actions to array conclude, which
# will be executed when semicolon executes
declare -a lasterror
lasterror=("" "-1" "0")
# header flags:
inline=1 # inlined words compile code rather than function call.
protected=2 # protect code body against trashing when set.
# detokeniser flags:
flagscount="0"
newflag() {
(( $1 = 1<<flagscount++ )) # name -> value
flagname["${!1}"]="$1" # value -> name
}
newflag functionheader # create header
newflag sourcelocation # store source location
newflag does # rip code apart into define- and run time portions
# what to execute when no command line args given
default='from defaults' # load defaults from file
# number of bits per cell.
# one address increment is always a cell, this setting
# won't change that.
declare -i bits
# Can't use 64 bit - bash doesn't know unsigned, and in the
# rare cases that carry needs detection, bit msb<<1 is used.
bits=63 # 63 bit is best I can do for now.
#bits=62
#bits=48 # 0...281,474,976,710,655
#bits=32 # 0...4,294,967,295
#bits=21 # covers 6 decimal digits signed numeric space
#bits=20 # 20 bits, unusual but not impractical. 8086 address space
#bits=16 # 16 bits, a tad retro
#bits=11 # covers 3 decimal digits signed numeric space
#bits=8 # 8 bits are nice for testing
((bits<8 && bits>63))&&{
echo "illegal cell width: $bits bits"
exit 1
}
declare -i msb="$((1<<(bits-1)))"
declare -i carry="$((msb*2))"
declare -i maxint="msb-1"
declare -i maxuint="maxint|msb"
declare -i true="maxuint"
declare -i false=0
declare -i compiling="false" # state flag: interpreting or compiling
declare -l lastword="" # most recently created word, set by defining words
line="" # source line being worked on. modified during parsing and processing
word="" # bl delimited string from line currently being processed
# xeditor runs detached from yoda process and doesn't block input
# therefore an xeditor should open its own window
editor="vi"
#xeditor="efte"
[[ -n "$editor" ]] && editor() { $editor "$@"; } # used with doc, edit
[[ -n "$xeditor" ]] && {
editor() { $xeditor "$@" & } # used with doc, edit
editor="$xeditor"
}
webview() { lynx -dump "$1"; }
declare -i magic=100000
remagic() { magic+=RANDOM+10000; }
remagic
declare -Ai bases=(['%']=2 ['#']=10 ['$']=16)
declare -Ai asc=() # ascii table
declare -a char=() # character table
for i in {1..255}; do # ascii->char LUT for emit, pack$, char$
char[i]="$(echo -en "\\x$(printf "%x" "$i")")"
((i == 10)) || asc["${char[i]}"]="$i" # linefeed is invalid hash key
done
# ----- diagnostics ------------------------------------- #fold00
trace() {
printf "### %-1s %-20s %s\n" "${char[$(((compiling&1)?58:32))]}" "[${FUNCNAME[1]}]" "$@"
# : bl
}
pretend() {
echo "${FUNCNAME[1]} pretends to $*"; } # placeholder for use in unfinished code
# ----- headers ------------------------------------------ #fold00
#setheaderflag() { (( flags["$lastword"] |= "$1" )); } # a bash glitch doesn't allow me to use this form, because it produces errors with
# some array keys like "'".
setheaderflag() { # where against this form has no problem with those,
flags["$lastword"]="$(( ${flags["$lastword"]} | "$1" ))"
}
inline() { setheaderflag "$inline" ; }
protect() { setheaderflag "$protected"; }
# remove header and flags of word passed as arg1
# from current wordlist
# arg1: word
removeheaderfromcurrent() {
unset -v 'current["$1"]' # remove from source wordlist
unset -v 'flags["$1"]'
}
# copy header of last word, along with header flags,
# to wordlist passed as arg1
# header is assumed in current vocabulary, where it
# has just been created.
# arg1: destination wid
copytowordlist() {
local dest destflags
declare -n dest="wordlist_$1" # set up pointers to destination wordlist
declare -n destflags="flags_$1"
dest["$lastword"]="${current[$lastword]}" # copy function name and header flags
destflags["$lastword"]="${flags[$lastword]}"
}
# move header of last word, along with header flags,
# to wordlist passed as arg1
# header is assumed in current vocabulary, where it
# has just been created.
# arg1: destination wid
movetowordlist() {
copytowordlist "$1" # destination wid
removeheaderfromcurrent "$lastword"
}
compiled() { movetowordlist "$compilerwid"; } # move last header to compiler context vocabulary
immediate() { copytowordlist "$compilerwid"; }
# create header, but don't compile, and don't generate function yet.
# arg1: word name
# arg2: function name
# NOTE: leave it to header to prepend $nameprefix.
header() {
lastword="$1"
[[ -z "${current["$lastword"]}" ]] ||
echo "redefining $lastword"
current["$lastword"]="$2" # associate word with name
flags["$lastword"]="0" # default to no flags
}
# ----- detokeniser / pretend optimiser ------------------ #fold00
# currently only a detokeniser. Plugged preparing further use.
# The way it works:
# when a word gets compiled to a function, it will first be
# cached in an array "raw". semicolon then calls compile,
# which generates a bash function from raw.
# first thing compile does is calling optimise, which visits
# all items in raw, potentially modifies or substitutes them
# while transferring to an array named target.
# compile then creates the bash function from target, which
# at that point is expected to contain literal code only.
# Compiling words which add to raw array can tag entries,
# informing optimiser/detokeniser about their nature, so it
# can respond in an appropriate way to raw array entries.
# construct function name from xt
# arg1:xt
# result: name:functionname
name() {
name="${nameprefix}_$1" # create function name from xt as argument
}
# return a new function name in variable name
# arg1:word
# result: name:functionname
newname() {
names[xt]="$1" # keep word name for .name, name$
name="${nameprefix}_$((xt++))" # create function name from xt as argument
}
# arg1: code token
# arg2: code token type (opt)
code() { # instruction stream injector
raw+=("$1")
analyser+=("${2:-0}") # default to literal code
}
# arg1: file handle
# arg2: line number
where() {
code "$1:$2" "$sourcelocation"
}
declare -i dodoes_at
# return values of all these analyser_${flagname} mean:
# 0: append raw or generated code to target
# 1: continue optimiser loop without adding code to target
analyser_functionheader() {
word="$rawline" # word name of new word
if [[ -n "${unresolved[$word]}" ]]; then
name="${unresolved[$word]}" # if forward ref, reuse function name of forward ref
unset -v "unresolved['$word']" # going to resolve word, remove from forward refs
else
newname "$word" # not a forward ref: invent new name
fi
header "$word" "$name" # create new header
return 1
}
analyser_sourcelocation() {
where["$lastword"]="$rawline" # remember source location
return 1
}
analyser_does() {
dodoes_at="$((${#target[@]}+1))" # remember end of target for splitting code (but only when whole word
}
compiler_init() {
raw=()
target=()
analyser=() # reinitialise code tags for analyser
conclude=()
}
# doesn't actually do any optimizing.
# Function is limited to:
# - generating code for pushing
# - generating code for function calls from xts
# - dealing with does>
# - delayed headers creation
# - storing source location
optimise() { # run code generator on ${analyser[@]} data
((${#raw[@]}==${#analyser[@]}))||echo "raw/analyser<>1"
target=()
dodoes_at=0
for ((tmp=0; tmp<${#raw[@]}; tmp++)); do
rawline="${raw[tmp]}"
rawflag="${analyser[tmp]}"
targetline="$rawline" # assume code
((rawflag)) && {
analyser_"${flagname[rawflag]}" || continue # flags: examine and act upon
}
[[ -z "$targetline" ]] || target+=("$targetline")
done
((dodoes_at)) && {
# in raw code was a does> encountered: split generated code in
# two parts. first part is compiled to defining word, second part
# is seperated to an array item associated with defining word.
# dodoes, when executed, knows how to retrieve and
# compile that code to defined word.
if (( ${#target[@]} > dodoes_at )); then # everything ok: there was code between does> and ; - extract and store it.
doescode[${current[$lastword]}]=$(printf "%s\n" "${target[@]:$dodoes_at}")
target=("${target[@]:0:$dodoes_at}")
else # but does> without code ...
unset "target[-1]" # will cause to compile without any does run time action. no code behind does>
fi # results in no additional action at all, therefore the whole does> can be dropped.
}
}
# ----- compile and resolve forward refs ----------------- #fold00
newheaders() {
((tmp=previousheaders, previousheaders=xt, xt!=tmp))
}
# create a forward ref for word
# arg1: word
forwardref() {
local word="${1,,}"
[[ -z ${unresolved["$1"]} ]] && { # don't re-forwardref and already forward ref'ed word
newname "$word" # generate new function name
unresolved["$word"]="$name" # keep in unresolved vocabulary
}
}
# arg1: word
compile_forwardref() {
local word="${1,,}"
[[ -z ${unresolved["$word"]} ]] && { # don't re-forwardref and already forward ref'ed word
newname "$word" # generate new function name
unresolved["$word"]="$name" # keep in unresolved vocabulary
}
code "$name" # compile dangling call to unresolved name.
}
# satisfy one single reference, passed as arg1.
# tries by loading source immediately if available.
# if source isn't available, a forward ref will be generated.
satisfy1() {
exists "$1" "${order[@]}" && return # word already defined, nothing to do
local word="${1,,}"
[[ -n "${lib[$word]}" ]] && { # library of needed word known?
forwardref "$word" # yes: create forward ref only
((${#raw[@]})) && return 1 # refuse to satisfy while building new word is in progress
tmp="${libdirs[0]}/${lib["$word"]}" # path and name of lib file
[[ -f "$tmp" ]] && from "$tmp" # load if file exists
return 0
}
}
resolving=0 # early termination helper
# looping resolve until header count stabilised.
# doing so because resolving forward references may create
# new forward references, which also need resolving.
resolve() {
(( ! ${#unresolved[@]} || compiling || resolving )) && return # only run without recursing if unresolved words exist
local file resolving=1 # prevent recursion
while newheaders && ((${#unresolved[@]})); do # loop while unresolved until header count stabilises
for word in "${!unresolved[@]}"; do # try to resolve all in unresolved vocabulary
satisfy1 "$word" # satisfying a word can create new forward references
done
done
}
compiler_init() {
raw=()
target=()
analyser=() # reinitialise code tags for analyser
conclude=()
}
# create a function from raw[@]
compile() {
optimise # rewrite ${raw[@]} to ${target[@]}
use "${target[@]}" # generate function from $target[@] with name of $lastword
compiler_init
}
# ----- line parser -------------------------------------- #fold00
# these need extglobs, set with "shopt -s extglob"
minwhitespace() {
line="${line##+([[:space:]])}" # remove all leading spaces (needs extglobs)
}
# read space delimited word from line without removing it.
peek_word() {
word="${line##+([[:space:]])}" # remove all leading spaces (needs extglobs)
word="${word%%[[:space:]]*}" # bl lookahead
}
# read space delimited word from line, remove it,
# remove single leading space from remainder of line.
word() {
peek_word
line="${line#*"$word"}" # quotes around word are needed
line="${line#+([[:space:]])}" # remove single leading space (n
[[ -n "$word" ]]
}
# read string from line and remove it, also remove delimiter.
# delimiter passed as arg1.
# leading spaces aren't removed from remainder of line.
parse() {
[[ -z "$1" ]] && word="$line" || word="${line%%"${1}"*}"
line="${line#"${word}"}"
[[ "${line:0:1}" == "$1" ]] && line="${line:1}"
}
# ----- compiler support words --------------------------- #fold00
# arg1: xt
codebody() { # inline compiler
local i codebody
name "$1" # xt to function name
readarray -t codebody < <(type "$name")
for ((i=0; i<${#codebody[@]}; i++)); do
[[ "${codebody[i]:0:1}" == " " ]] &&
code "${codebody[i]}"
done
}
# rewrite run time semantics of most recently defined word
# arg1+: new run time code.
use() {
eval "${current["$lastword"]}() { $(printf '%s\n' "$@"); }" # rewrites function "$name", using instructions received as arguments
}
# arg1: xt
# arg2: new run time code
apply() {
name "$1"; shift # rewrites function "yoda_$1",
eval "$name() { $*; }" # using instructions received as arguments
}
# arg1: add action for semicolon to execute to conclude[@]
conclude() { conclude+=("$@"); }
# execute or compile args.
stateless() {
if ((compiling)); then
code "$@"
else
eval "$*"
fi
}
# ----- defining words ----------------------------------- #fold00
put() { code "((s[sp]=$1))"; } # push without pointer inc
push() { code "((s[++sp]=$1))"; }
get() { code "(($1=s[sp]))"; } # pop without point dec
pop() { code "(($1=s[sp--]))"; }
call() { code "${nameprefix}_$1"; }
execute() {
tmp="${context[$1]}"
[[ -n "$tmp" ]] || notfound "$1"
code "$tmp"
}
# arg1: wordname
# arg2: filehandle
# arg3: line number
delayheader() {
[[ -z "$1" ]] && missingname
compiler_init
lastword="$1"
code "$lastword" "$functionheader"
[[ -z "$2" ]] || where "$2" "$3"
}
# arg1: wordname
# arg2: filehandle
# arg3: line number
nowheader() {
delayheader "$1" "$2" "$3"
optimise
compiler_init
}
# primitive "wordname" "functionname|code"
primitive() {
nowheader "$1" "$filehandle" "${BASH_LINENO[0]}"
use "$2"
inline
}
colon() {
tmp="${BASH_LINENO[0]}" # assume not evaluate
(( filehandle == 1 )) && # if defined in yoda
[[ "${FUNCNAME[2]}" == "evaluate" ]] && # by evaluate
tmp="${BASH_LINENO[2]}" # then take line number from different entry
delayheader "$1" "$filehandle" "${linenr:-$tmp}"
}
semicolon() {
for ((nextconclude=0; nextconclude<${#conclude[@]};nextconclude++)); do
eval "${conclude[nextconclude]}"
done
compile # compilation is gathered in an array raw. Only
((compiling=false)) # when semicolon completes compilation, is
} # a function created from contents of array.
constant() {
delayheader "$1" "$filehandle" "${linenr:-${BASH_LINENO[0]}}"
push "$2"
((body[xt]=dp))
semicolon
inline
}
# arg1=name arg2=val
# initialized with 0 or optional arg2
variable() {
local linenr="${linenr:-${BASH_LINENO[0]}}"
(( "$1"="$dp" ))
m[dp]="${2:-0}"
constant "$1" "$((dp++))"
}
# dual headed variables. in bash do their names represent
# indici into m. in yoda is that index pushed and used
# as memory address. therefore, this code for variable foo
# is equivlant: bash: m[foo]=123 yoda: 123 foo !
variable 'shell' "0"
variable 'forwardrefs' "1"
# NOTE: here is an interesting bug: last does> test
# only succeeds if here's a variable with value of 1
m[dp++]=1
variable 'callstack' "0"
variable 'base' "10"
variable 'state' "0"
constant 'maxuint' "$maxuint"
constant 'maxint' "$maxint"
constant 'msb' "$msb"
constant 'true' "$true"
constant 'false' "false"
constant 'esc' "27"
constant 'bl' "32"
constant 'bits' "$bits"
constant 'cell' "1"
# may not inline: value may change, inlined code won't
value() {
nowheader "$1" "$filehandle" "${linenr:-${BASH_LINENO[0]}}"
use "((s[++sp]=$2))"
}
# ----- errors, traps, entry points----------------------- #FOLD00
warm() {
r=() rp=0
((sp<0))&&s=(0) sp=0 # always clear stack underflow
((clean))&&s=(0) sp=0 ss=() # empty stacks
((filehandle=0, linenr=-1, compiling=false, error=0, m[base]=10))
line="" # signal to evaluate to stop evaluation of current line
compiler_init
}
coldvector="" # colon word "boot" assigns coldvector. started when compilation completed.
cold() { # also by running cold. error restarts interpreter at cold
[[ -z "$coldvector" ]] && exit 1
warm; $coldvector
}
callstack() {
printf "%s: " "Call stack"
printf "%s " "${FUNCNAME[@]:5:${m[callstack]}}" # remove callstack and error from output
printf "\n"
}
read -r redbg < <(tput setab 1)
read -r normal < <(tput sgr0)
error() {
printf "%s%s %s\n" "${tib%"${word}"*"${line}"}" "${redbg}${word}${normal}" "$line"
printf 'ERROR: %s\n' "$@"
row=$((${#tib}-${#line}-${#word}))
# NOTE: passing file name in lasterror - could pass file handle instead
lasterror=("$file" "$linenr" "$row")
(( linenr < 0 )) || printf "at %s of %s\n" "$linenr,$row" "$file"
((m[callstack]))&&callstack
cold
}
declare -a errortext
declare -i nexterror=0
errornr() {
error="$1"
shift
error "${errortext[error]} $*"
}
# a somewhat wild construct, sort of attempting
# to emulate Forth's create ... does> here. I'm
# surprised that shellcheck finds this acceptable
newerror() {
(( "$1"=nexterror )) # assign new error number to error name
eval "$1() { errornr \"$1\" "\"\$*"\"; }" # synthesise error function with error name
shift
errortext[nexterror++]="$*" # associate error message with error number
}
newerror "noerror" "no error"
newerror "missingname" "no or empty name"
newerror "missing" "missing"
newerror "stackunderflow" "stack underflow"
newerror "notfound" "not found:"
newerror "nosource" "no source for"
newerror "stillcompiling" "still compiling after"
newerror "unbalancedstack" "unbalanced stack in file"
newerror "unstructured" "unstructured: missing"
newerror "shellerror" "shell returned"
newerror "filenotfound" "file not found:"
ctrl-c() { # "break" is configurable item to control
printf "%s\n" "break" # how yoda responds to break signal.
((break == 1)) && exit 1 # This is due to not having a clean
((m[callstack]))&& callstack # warm start procedure.
((break == 2)) && cold
}
trap ctrl-c SIGINT
#err() { echo "error trap: $word ${FUNCNAME[1]}"; }
# trap err ERR
# ----- wordlist searching ------------------------------- #fold01
# results array
declare -ai exists # result array: exists[0]=0|xt, [1]=flags, [2]: wid
# arg1:word arg2+:wids
# returns: exists[0]=0|xt, [1]=flags, [2]=wid where word was found
exists() {
local context word="${1,,}" # this is what we're looking for
shift
for wid in "$@"; do # in these word lists
declare -n context="wordlist_$wid" # make next wordlist visible
declare -n flags # appease shellcheck
declare -n flags="flags_$wid"
[[ -n "${context[${word}]}" ]] && { # word found in it?
exists=( # yes: return xt, flags and wid as result
"${context[${word}]##*_}"
"${flags[${word}]}"
"$wid"
)
return 0
}
done # not yet found: loop through all wids in arguments
exists=(0) # word not found: return 0
return 1 # and indicate error
}
# ----- patterns ----------------------------------------- #fold00
# arg1: number string: 123456.
# arg2: radix
# return values in dh:dl are not masked with $true
alldigits="0123456789abcdefghijklmnopqrstuvwxyz" # cut legal chars for base
number_radix() { # base passed as argument
local word="${1,,}" radix="$2" i j digit
local legaldigits="${alldigits:0:radix}" sign="" dh=0 dl=0
[[ ${word:0:1} == - ]]&&{ # leading dash?
sign="-"
word="${word:1}" # strip dash
}
local tail="${word#*[!"$legaldigits"]}" # remove up to and including first non-digit character
local left="${word%%"$tail"}" # the removed part
[[ -z "$left" ]]&&{ # no non-digit chars in string
((tmp=$sign$radix#$tail&true))
stateless "((s[++sp]=$tmp))" # push or compile as single signed or unsigned number
return 0
}
((${#left}>1))||return 1 # leading decimal point: not a number
[[ ${left:0-1} == . ]]&&left="${left:0:-1}" # remove dot
word="${left}${tail}" # recombine number without decimal point
[[ -z "${word//[$legaldigits]/}" ]]||return 1 # number contains other chars than legal digits: not a number
((dpl=${#tail})) # write dot position to dpl
for ((j=0; j<${#word};j++)); do
digit="${word:j:1}"
((s1=dh*radix, s2=dl, dl=0))
for ((i=bits; i--;)); do
((dl*=2, s2*=2, dl&carry))&&((s2++, dl&=true))
((s2&carry))&&((dl+=radix, dl&carry))&&((s2++))
done
((digit=asc[$digit]-48, digit>9))&&((digit-=39))
((dh=s1+s2, dl+=digit, dl&carry))&&((dh++))
done
[[ -z "$sign" ]]||{ # reapply sign
((dl^=true, dh^=true, dl++, dl&carry))&&((dh++))
}
((dl&=true, dh&=true))
stateless "((s[++sp]=$dl, s[++sp]=$dh))" # push or compile signed or unsigned double number
return 0 # indicate success: no need for further examination
}
declare -a patterns
add_pattern() { patterns+=("$1"); }
# patternlets must return a value=0 if matching, <>0 otherwise.
# return value determines whether search for matching pattern will continue.
add_pattern "number" # 123 -123 123.456 -123.456
number() { number_radix "$word" "${m[base ]}"; } # respecting base
add_pattern "prefixednumber" # #123 #123.456 #-123 #-123.456 $abc $abc.def $-abc $-abc.def %101 %101.01 %-101 %-101.01
prefixednumber() { # base override by number prefix
[[ $char1 == [%\$\#] ]] &&
number_radix "$tail" "${bases[$char1]}"
}
add_pattern "tick_char" # 'c' and 'c
tick_char() {
[[ ${word} == \'? || ${word} == \'?\' ]] || return 1
stateless "s[++sp]=\"$(printf "%d" "'$tail'")\""
}
add_pattern "ctrl_char" # ^c and ^C
ctrl_char() {
[[ ${word,,} == ^[a-z\[] ]] || return 1
stateless "s[++sp]=\"$(($(printf "%d" "'$tail'")&31))\""
}
add_pattern "string_dquote" # "string", "multi string"
string_dquote() {
[[ "$char1" == '"' ]] || return 1
if [[ "${tail:0-1}" == '"' ]]; then # "string"
tmp="${tail:0:-1}"
else # 'string multi'
parse '"' || missing "closing double quotes" # parses into variable "word"
tmp="$tail $word"
fi
stateless "ss+=(\"$tmp\")"
}
add_pattern "string_squote" # 'string', 'multi string'
string_squote() {
[[ "$char1" == "'" ]] || return 1
if [[ "${tail:0-1}" == "'" ]]; then # 'string'
tmp="${tail:0:-1}"
else # 'string multi'
parse "'" || missing "closing single quote" # parses into variable "word"
tmp="$tail $word"
fi
stateless "ss+=('$tmp')"
}
add_pattern "hashbang" # #!hashbang
hashbang() { [[ "${word:0:2}" == '#!' ]] && line=""; }
add_pattern "shellcommand" # >shell command
shellcommand() {
[[ "$char1" == '>' ]] || return 1
tmp="$tail" # >foo... -> foo...
if [[ ${tmp:0-1} == ";" ]]; then # semicolon attached to command
tmp="${tmp:0:-1}" # foo; -> foo
elif [[ "$line" == *';'* ]]; then # semicolon space seperated from command
parse ';' # parse input until ;
tmp+=" $word"
else # no semicolon at all
tmp+=" $line"
line="" # and removed from further evaluation
fi
stateless "$tmp" # execute or compile the parsed shell command
return 0
}
# ----- line interpreter, evaluate------------------------ #fold00
# text compiler and interpreter, the so-called outer interpreter
# interpret/compile the line passed as argument
# in here most compiling and interpreting work is done.
# time to split this thing up, as it's getting a tad unwieldy
evaluate() {
# line may get modified, and that must be. some words depend on being able to empty it.
# tib remains unaffected unless explicitely modified (query et al), for error reporting.
local line="$*" tib="$line"
while [[ -n "$line" ]]; do # still more to process on line (parsing removes strings from line)
((error || (sp<0)))&&break
word || continue # parse white space delimited word.
local wordlower="${word,,}"
# examine compiler wordlist first
if ((compiling)); then # word wasn't found. search state specific vocabularies
[[ -z "${compiler["$wordlower"]}" ]] || { # found in in compiler while compiling
${compiler["$wordlower"]} # word in there are inherently "immediate", therefore they're executed here
continue # because that also deals with inline compilation.
}
[[ -z "${unresolved["$wordlower"]}" ]] || { # found in unresolved while compiling
code "${unresolved[$wordlower]}" # compile it
continue
}
fi
# --- search word in order ---
exists "$wordlower" "${order[@]}" && { # stateless word found?
tmp="${exists[0]}"
((compiling)) && {
((exists[1]&inline)) && {
codebody "$tmp" # inline: compile function body
continue
}
call "$tmp"
continue
}
name "$tmp" # not compiling: convert xt to function name
$name # execute function
continue # done processing this word
}
# --- word not in a searched wordlist
# --- pattern?
local char1="${word:0:1}" tail="${word:1}" # many patternlets look at first char and use the remainder
for pattern in "${patterns[@]}"; do
$pattern && continue 2 # arguments in $word, $char1, $tail
done
# --- can be included?
(( m[forwardrefs] )) && { # forwardrefs can be 0, 1 or 2 for levels of forward referencing.
if ((compiling)); then # level 1 during compilation: forward resolve only word found in library
(( m[forwardrefs] == 2 || ${#lib[$wordlower]} )) && { # level 2 during compilation: forward reference all unknown words
compile_forwardref "$wordlower"
continue
}
else # any level during interpretation: compile from library or throw error
satisfy1 "$wordlower" && { # attempt to load word
exists "$wordlower" "${order[@]}" || notfound "$word" # word not found: error (though it was compiled successfully)
name "${exists[0]}" # into is in search order. Therefore search order is searched for word.
$name # word found in search order -> execute
continue
}
fi
}
# --- can be shelled to?
((m[shell])) && { # done unless shell flag was set
stateless "${word} ${line}" || # in which case execution as shell command is attempted (cmd not found returns 127)
shellerror "$?" # non-zero exit values of shell commands will be passed on to yoda error handler
line="" # assignment sets return value to 0
continue
}
# --- can be reported?
notfound "$word" # nothing helped. get grouchy.
line="" # stop evaluation: nothing left
done
((sp<0))&&stackunderflow
}
# ----- load source files -------------------------------- #fold00
# return filehandle pointing to matching file in array files,
# or to end of array where new file will be added if no
# matching file was found. Also return a return values (1)
# if not loaded, return 0 when loaded.
# (seems twisted but allows condition "if loaded; then ..."
# return filehandle as result, and return value
# indicating first time load or reload
# arg1: file name (fully qualified)
loaded() {
local file="$1"
for ((filehandle=1; filehandle<${#files[@]}; filehandle++)); do # while multiple inclusion is allowed, those files won't be recorded multiple times
[[ "${files[filehandle]}" == "$file" ]] && break # so when already recorded, break out
done
return $(( filehandle >= ${#files[@]}))
}
# arg1: wid
# arg2: any for alternative set of options
set-current() {
currentwid="$1"
declare -gn current="wordlist_$currentwid"
declare -gn flags="flags_$currentwid"
}
# trashed name as argument
# expects word in current
# ( -- )
trash() {
local current flags word="${1,,}"
declare -n current="wordlist_$currentwid"
declare -n flags="flags_$currentwid"
tmp="${flags[$word]}"
(((tmp&inline) && (!(tmp&protected)))) && unset -f "${current[$word]}"
removeheaderfromcurrent "$word"
}
# trash transient headers
notransients() {
((${#transients[@]})) && { # only if any transients exist at all
local current flags currentwid wid transient word
currentwid="-1" # make sure current will be declared, because local variables
for transient in "${transients[@]}"; do
wid="${transient%% *}" # wid of transient
word="${transient##* }" # transient word
((currentwid == wid)) || set-current "$wid" # must change wids
trash "$word" # remove header
done # repeat for all headers in list of transients
}
}
# nestable source include
# this from differs from the colon word, as it doesn't
# try the different ${libdirs[@}} directories.
# file, possible with path, must exist, or no go.
# Callers did a check prior to calling.
from() {
[[ -z "$1" ]] && return 2 # no file
local line lines linenr file="" filehandle word # must protect for nested includes.
local stackeffect="$sp" # included files must have net stack effect of 0
if [[ "$1" == */* ]]; then # file contains slash(es): don't search