CortexFoundation · ucwong · Sep 21, 2023 · Sep 19, 2023
diff --git a/core/asm/compiler.go b/core/asm/compiler.go
@@ -17,6 +17,8 @@
 package asm
 
 import (
+	"encoding/hex"
+	"errors"
 	"fmt"
 	"math/big"
 	"os"
@@ -30,7 +32,7 @@ import (
 // and holds the tokens for the program.
 type Compiler struct {
 	tokens []token
-	binary []interface{}
+	out    []byte
 
 	labels map[string]int
 
@@ -50,12 +52,10 @@ func NewCompiler(debug bool) *Compiler {
 // Feed feeds tokens in to ch and are interpreted by
 // the compiler.
 //
-// feed is the first pass in the compile stage as it
-// collects the used labels in the program and keeps a
-// program counter which is used to determine the locations
-// of the jump dests. The labels can than be used in the
-// second stage to push labels and determine the right
-// position.
+// feed is the first pass in the compile stage as it collects the used labels in the
+// program and keeps a program counter which is used to determine the locations of the
+// jump dests. The labels can than be used in the second stage to push labels and
+// determine the right position.
 func (c *Compiler) Feed(ch <-chan token) {
 	var prev token
 	for i := range ch {
@@ -79,7 +79,6 @@ func (c *Compiler) Feed(ch <-chan token) {
 				c.pc++
 			}
 		}
-
 		c.tokens = append(c.tokens, i)
 		prev = i
 	}
@@ -88,12 +87,11 @@ func (c *Compiler) Feed(ch <-chan token) {
 	}
 }
 
-// Compile compiles the current tokens and returns a
-// binary string that can be interpreted by the CVM
-// and an error if it failed.
+// Compile compiles the current tokens and returns a binary string that can be interpreted
+// by the EVM and an error if it failed.
 //
-// compile is the second stage in the compile phase
-// which compiles the tokens to CVM instructions.
+// compile is the second stage in the compile phase which compiles the tokens to EVM
+// instructions.
 func (c *Compiler) Compile() (string, []error) {
 	var errors []error
 	// continue looping over the tokens until
@@ -105,16 +103,8 @@ func (c *Compiler) Compile() (string, []error) {
 	}
 
 	// turn the binary to hex
-	var bin strings.Builder
-	for _, v := range c.binary {
-		switch v := v.(type) {
-		case vm.OpCode:
-			bin.WriteString(fmt.Sprintf("%x", []byte{byte(v)}))
-		case []byte:
-			bin.WriteString(fmt.Sprintf("%x", v))
-		}
-	}
-	return bin.String(), errors
+	h := hex.EncodeToString(c.out)
+	return h, errors
 }
 
 // next returns the next token and increments the
@@ -156,87 +146,114 @@ func (c *Compiler) compileLine() error {
 	return nil
 }
 
-// compileNumber compiles the number to bytes
-func (c *Compiler) compileNumber(element token) {
-	num := math.MustParseBig256(element.text).Bytes()
-	if len(num) == 0 {
-		num = []byte{0}
+// parseNumber compiles the number to bytes
+func parseNumber(tok token) ([]byte, error) {
+	if tok.typ != number {
+		panic("parseNumber of non-number token")
+	}
+	num, ok := math.ParseBig256(tok.text)
+	if !ok {
+		return nil, errors.New("invalid number")
 	}
-	c.pushBin(num)
+	bytes := num.Bytes()
+	if len(bytes) == 0 {
+		bytes = []byte{0}
+	}
+	return bytes, nil
 }
 
 // compileElement compiles the element (push & label or both)
 // to a binary representation and may error if incorrect statements
 // where fed.
 func (c *Compiler) compileElement(element token) error {
-	// check for a jump. jumps must be read and compiled
-	// from right to left.
-	if isJump(element.text) {
-		rvalue := c.next()
-		switch rvalue.typ {
-		case number:
-			// TODO figure out how to return the error properly
-			c.compileNumber(rvalue)
-		case stringValue:
-			// strings are quoted, remove them.
-			c.pushBin(rvalue.text[1 : len(rvalue.text)-2])
-		case label:
-			c.pushBin(vm.PUSH4)
-			pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes()
-			pos = append(make([]byte, 4-len(pos)), pos...)
-			c.pushBin(pos)
-		case lineEnd:
-			c.pos--
-		default:
-			return compileErr(rvalue, rvalue.text, "number, string or label")
-		}
-		// push the operation
-		c.pushBin(toBinary(element.text))
+	switch {
+	case isJump(element.text):
+		return c.compileJump(element.text)
+	case isPush(element.text):
+		return c.compilePush()
+	default:
+		c.outputOpcode(toBinary(element.text))
 		return nil
-	} else if isPush(element.text) {
-		// handle pushes. pushes are read from left to right.
-		var value []byte
+	}
+}
 
-		rvalue := c.next()
-		switch rvalue.typ {
-		case number:
-			value = math.MustParseBig256(rvalue.text).Bytes()
-			if len(value) == 0 {
-				value = []byte{0}
-			}
-		case stringValue:
-			value = []byte(rvalue.text[1 : len(rvalue.text)-1])
-		case label:
-			value = big.NewInt(int64(c.labels[rvalue.text])).Bytes()
-			value = append(make([]byte, 4-len(value)), value...)
-		default:
-			return compileErr(rvalue, rvalue.text, "number, string or label")
+func (c *Compiler) compileJump(jumpType string) error {
+	rvalue := c.next()
+	switch rvalue.typ {
+	case number:
+		numBytes, err := parseNumber(rvalue)
+		if err != nil {
+			return err
 		}
+		c.outputBytes(numBytes)
 
-		if len(value) > 32 {
-			return fmt.Errorf("%d type error: unsupported string or number with size > 32", rvalue.lineno)
-		}
+	case stringValue:
+		// strings are quoted, remove them.
+		str := rvalue.text[1 : len(rvalue.text)-2]
+		c.outputBytes([]byte(str))
+
+	case label:
+		c.outputOpcode(vm.PUSH4)
+		pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes()
+		pos = append(make([]byte, 4-len(pos)), pos...)
+		c.outputBytes(pos)
+
+	case lineEnd:
+		// push without argument is supported, it just takes the destination from the stack.
+		c.pos--
 
-		c.pushBin(vm.OpCode(int(vm.PUSH1) - 1 + len(value)))
-		c.pushBin(value)
-	} else {
-		c.pushBin(toBinary(element.text))
+	default:
+		return compileErr(rvalue, rvalue.text, "number, string or label")
 	}
+	// push the operation
+	c.outputOpcode(toBinary(jumpType))
+	return nil
+}
 
+func (c *Compiler) compilePush() error {
+	// handle pushes. pushes are read from left to right.
+	var value []byte
+	rvalue := c.next()
+	switch rvalue.typ {
+	case number:
+		value = math.MustParseBig256(rvalue.text).Bytes()
+		if len(value) == 0 {
+			value = []byte{0}
+		}
+	case stringValue:
+		value = []byte(rvalue.text[1 : len(rvalue.text)-1])
+	case label:
+		value = big.NewInt(int64(c.labels[rvalue.text])).Bytes()
+		value = append(make([]byte, 4-len(value)), value...)
+	default:
+		return compileErr(rvalue, rvalue.text, "number, string or label")
+	}
+	if len(value) > 32 {
+		return fmt.Errorf("%d: string or number size > 32 bytes", rvalue.lineno+1)
+	}
+	c.outputOpcode(vm.OpCode(int(vm.PUSH1) - 1 + len(value)))
+	c.outputBytes(value)
 	return nil
 }
 
 // compileLabel pushes a jumpdest to the binary slice.
 func (c *Compiler) compileLabel() {
-	c.pushBin(vm.JUMPDEST)
+	c.outputOpcode(vm.JUMPDEST)
+}
+
+func (c *Compiler) outputOpcode(op vm.OpCode) {
+	if c.debug {
+		fmt.Printf("%d: %v\n", len(c.out), op)
+	}
+	c.out = append(c.out, byte(op))
 }
 
-// pushBin pushes the value v to the binary stack.
-func (c *Compiler) pushBin(v interface{}) {
+// output pushes the value v to the binary stack.
+func (c *Compiler) outputBytes(b []byte) {
 	if c.debug {
-		fmt.Printf("%d: %v\n", len(c.binary), v)
+		fmt.Printf("%d: %x\n", len(c.out), b)
 	}
-	c.binary = append(c.binary, v)
+	c.out = append(c.out, b...)
 }
 
 // isPush returns whether the string op is either any of
@@ -263,13 +280,13 @@ type compileError struct {
 }
 
 func (err compileError) Error() string {
-	return fmt.Sprintf("%d syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want)
+	return fmt.Sprintf("%d: syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want)
 }
 
 func compileErr(c token, got, want string) error {
 	return compileError{
 		got:    got,
 		want:   want,
-		lineno: c.lineno,
+		lineno: c.lineno + 1,
 	}
 }
diff --git a/core/asm/compiler_test.go b/core/asm/compiler_test.go
@@ -54,6 +54,14 @@ func TestCompiler(t *testing.T) {
 `,
 			output: "6300000006565b",
 		},
+		{
+			input: `
+	JUMP @label
+label: ;; comment
+	ADD ;; comment
+`,
+			output: "6300000006565b01",
+		},
 	}
 	for _, test := range tests {
 		ch := Lex([]byte(test.input), false)

diff --git a/core/asm/lex_test.go b/core/asm/lex_test.go
@@ -72,6 +72,16 @@ func TestLexer(t *testing.T) {
 			input:  "@label123",
 			tokens: []token{{typ: lineStart}, {typ: label, text: "label123"}, {typ: eof}},
 		},
+		// comment after label
+		{
+			input:  "@label123 ;; comment",
+			tokens: []token{{typ: lineStart}, {typ: label, text: "label123"}, {typ: eof}},
+		},
+		// comment after instruction
+		{
+			input:  "push 3 ;; comment\nadd",
+			tokens: []token{{typ: lineStart}, {typ: element, text: "push"}, {typ: number, text: "3"}, {typ: lineEnd, text: "\n"}, {typ: lineStart, lineno: 1}, {typ: element, lineno: 1, text: "add"}, {typ: eof, lineno: 1}},
+		},
 	}
 
 	for _, test := range tests {

diff --git a/core/asm/lexer.go b/core/asm/lexer.go
@@ -42,6 +42,8 @@ type token struct {
 // is able to parse and return.
 type tokenType int
 
+//go:generate go run golang.org/x/tools/cmd/stringer -type tokenType
+
 const (
 	eof              tokenType = iota // end of file
 	lineStart                         // emitted when a line starts
@@ -52,31 +54,13 @@ const (
 	labelDef                          // label definition is emitted when a new label is found
 	number                            // number is emitted when a number is found
 	stringValue                       // stringValue is emitted when a string has been found
-
-	Numbers            = "1234567890"                                           // characters representing any decimal number
-	HexadecimalNumbers = Numbers + "aAbBcCdDeEfF"                               // characters representing any hexadecimal
-	Alpha              = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
 )
 
-// String implements stringer
-func (it tokenType) String() string {
-	if int(it) > len(stringtokenTypes) {
-		return "invalid"
-	}
-	return stringtokenTypes[it]
-}
-
-var stringtokenTypes = []string{
-	eof:              "EOF",
-	lineStart:        "new line",
-	lineEnd:          "end of line",
-	invalidStatement: "invalid statement",
-	element:          "element",
-	label:            "label",
-	labelDef:         "label definition",
-	number:           "number",
-	stringValue:      "string",
-}
+const (
+	decimalNumbers = "1234567890"                                           // characters representing any decimal number
+	hexNumbers     = decimalNumbers + "aAbBcCdDeEfF"                        // characters representing any hexadecimal
+	alpha          = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
+)
 
 // lexer is the basic construct for parsing
 // source code and turning them in to tokens.
@@ -200,7 +184,6 @@ func lexLine(l *lexer) stateFn {
 			l.emit(lineEnd)
 			l.ignore()
 			l.lineno++
-
 			l.emit(lineStart)
 		case r == ';' && l.peek() == ';':
 			return lexComment
@@ -225,6 +208,7 @@ func lexLine(l *lexer) stateFn {
 // of the line and discards the text.
 func lexComment(l *lexer) stateFn {
 	l.acceptRunUntil('\n')
+	l.backup()
 	l.ignore()
 
 	return lexLine
@@ -234,7 +218,7 @@ func lexComment(l *lexer) stateFn {
 // the lex text state function to advance the parsing
 // process.
 func lexLabel(l *lexer) stateFn {
-	l.acceptRun(Alpha + "_" + Numbers)
+	l.acceptRun(alpha + "_" + decimalNumbers)
 
 	l.emit(label)
 
@@ -253,9 +237,9 @@ func lexInsideString(l *lexer) stateFn {
 }
 
 func lexNumber(l *lexer) stateFn {
-	acceptance := Numbers
+	acceptance := decimalNumbers
 	if l.accept("xX") {
-		acceptance = HexadecimalNumbers
+		acceptance = hexNumbers
 	}
 	l.acceptRun(acceptance)
 
@@ -265,7 +249,7 @@ func lexNumber(l *lexer) stateFn {
 }
 
 func lexElement(l *lexer) stateFn {
-	l.acceptRun(Alpha + "_" + Numbers)
+	l.acceptRun(alpha + "_" + decimalNumbers)
 
 	if l.peek() == ':' {
 		l.emit(labelDef)