Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 100 additions & 83 deletions core/asm/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package asm

import (
"encoding/hex"
"errors"
"fmt"
"math/big"
"os"
Expand All @@ -30,7 +32,7 @@ import (
// and holds the tokens for the program.
type Compiler struct {
tokens []token
binary []interface{}
out []byte

labels map[string]int

Expand All @@ -50,12 +52,10 @@ func NewCompiler(debug bool) *Compiler {
// Feed feeds tokens in to ch and are interpreted by
// the compiler.
//
// feed is the first pass in the compile stage as it
// collects the used labels in the program and keeps a
// program counter which is used to determine the locations
// of the jump dests. The labels can than be used in the
// second stage to push labels and determine the right
// position.
// feed is the first pass in the compile stage as it collects the used labels in the
// program and keeps a program counter which is used to determine the locations of the
// jump dests. The labels can than be used in the second stage to push labels and
// determine the right position.
func (c *Compiler) Feed(ch <-chan token) {
var prev token
for i := range ch {
Expand All @@ -79,7 +79,6 @@ func (c *Compiler) Feed(ch <-chan token) {
c.pc++
}
}

c.tokens = append(c.tokens, i)
prev = i
}
Expand All @@ -88,12 +87,11 @@ func (c *Compiler) Feed(ch <-chan token) {
}
}

// Compile compiles the current tokens and returns a
// binary string that can be interpreted by the CVM
// and an error if it failed.
// Compile compiles the current tokens and returns a binary string that can be interpreted
// by the EVM and an error if it failed.
//
// compile is the second stage in the compile phase
// which compiles the tokens to CVM instructions.
// compile is the second stage in the compile phase which compiles the tokens to EVM
// instructions.
func (c *Compiler) Compile() (string, []error) {
var errors []error
// continue looping over the tokens until
Expand All @@ -105,16 +103,8 @@ func (c *Compiler) Compile() (string, []error) {
}

// turn the binary to hex
var bin strings.Builder
for _, v := range c.binary {
switch v := v.(type) {
case vm.OpCode:
bin.WriteString(fmt.Sprintf("%x", []byte{byte(v)}))
case []byte:
bin.WriteString(fmt.Sprintf("%x", v))
}
}
return bin.String(), errors
h := hex.EncodeToString(c.out)
return h, errors
}

// next returns the next token and increments the
Expand Down Expand Up @@ -156,87 +146,114 @@ func (c *Compiler) compileLine() error {
return nil
}

// compileNumber compiles the number to bytes
func (c *Compiler) compileNumber(element token) {
num := math.MustParseBig256(element.text).Bytes()
if len(num) == 0 {
num = []byte{0}
// parseNumber compiles the number to bytes
func parseNumber(tok token) ([]byte, error) {
if tok.typ != number {
panic("parseNumber of non-number token")
}
num, ok := math.ParseBig256(tok.text)
if !ok {
return nil, errors.New("invalid number")
}
c.pushBin(num)
bytes := num.Bytes()
if len(bytes) == 0 {
bytes = []byte{0}
}
return bytes, nil
}

// compileElement compiles the element (push & label or both)
// to a binary representation and may error if incorrect statements
// where fed.
func (c *Compiler) compileElement(element token) error {
// check for a jump. jumps must be read and compiled
// from right to left.
if isJump(element.text) {
rvalue := c.next()
switch rvalue.typ {
case number:
// TODO figure out how to return the error properly
c.compileNumber(rvalue)
case stringValue:
// strings are quoted, remove them.
c.pushBin(rvalue.text[1 : len(rvalue.text)-2])
case label:
c.pushBin(vm.PUSH4)
pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes()
pos = append(make([]byte, 4-len(pos)), pos...)
c.pushBin(pos)
case lineEnd:
c.pos--
default:
return compileErr(rvalue, rvalue.text, "number, string or label")
}
// push the operation
c.pushBin(toBinary(element.text))
switch {
case isJump(element.text):
return c.compileJump(element.text)
case isPush(element.text):
return c.compilePush()
default:
c.outputOpcode(toBinary(element.text))
return nil
} else if isPush(element.text) {
// handle pushes. pushes are read from left to right.
var value []byte
}
}

rvalue := c.next()
switch rvalue.typ {
case number:
value = math.MustParseBig256(rvalue.text).Bytes()
if len(value) == 0 {
value = []byte{0}
}
case stringValue:
value = []byte(rvalue.text[1 : len(rvalue.text)-1])
case label:
value = big.NewInt(int64(c.labels[rvalue.text])).Bytes()
value = append(make([]byte, 4-len(value)), value...)
default:
return compileErr(rvalue, rvalue.text, "number, string or label")
func (c *Compiler) compileJump(jumpType string) error {
rvalue := c.next()
switch rvalue.typ {
case number:
numBytes, err := parseNumber(rvalue)
if err != nil {
return err
}
c.outputBytes(numBytes)

if len(value) > 32 {
return fmt.Errorf("%d type error: unsupported string or number with size > 32", rvalue.lineno)
}
case stringValue:
// strings are quoted, remove them.
str := rvalue.text[1 : len(rvalue.text)-2]
c.outputBytes([]byte(str))

case label:
c.outputOpcode(vm.PUSH4)
pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes()
pos = append(make([]byte, 4-len(pos)), pos...)
c.outputBytes(pos)

case lineEnd:
// push without argument is supported, it just takes the destination from the stack.
c.pos--

c.pushBin(vm.OpCode(int(vm.PUSH1) - 1 + len(value)))
c.pushBin(value)
} else {
c.pushBin(toBinary(element.text))
default:
return compileErr(rvalue, rvalue.text, "number, string or label")
}
// push the operation
c.outputOpcode(toBinary(jumpType))
return nil
}

func (c *Compiler) compilePush() error {
// handle pushes. pushes are read from left to right.
var value []byte
rvalue := c.next()
switch rvalue.typ {
case number:
value = math.MustParseBig256(rvalue.text).Bytes()
if len(value) == 0 {
value = []byte{0}
}
case stringValue:
value = []byte(rvalue.text[1 : len(rvalue.text)-1])
case label:
value = big.NewInt(int64(c.labels[rvalue.text])).Bytes()
value = append(make([]byte, 4-len(value)), value...)
default:
return compileErr(rvalue, rvalue.text, "number, string or label")
}
if len(value) > 32 {
return fmt.Errorf("%d: string or number size > 32 bytes", rvalue.lineno+1)
}
c.outputOpcode(vm.OpCode(int(vm.PUSH1) - 1 + len(value)))
c.outputBytes(value)
return nil
}

// compileLabel pushes a jumpdest to the binary slice.
func (c *Compiler) compileLabel() {
c.pushBin(vm.JUMPDEST)
c.outputOpcode(vm.JUMPDEST)
}

func (c *Compiler) outputOpcode(op vm.OpCode) {
if c.debug {
fmt.Printf("%d: %v\n", len(c.out), op)
}
c.out = append(c.out, byte(op))
}

// pushBin pushes the value v to the binary stack.
func (c *Compiler) pushBin(v interface{}) {
// output pushes the value v to the binary stack.
func (c *Compiler) outputBytes(b []byte) {
if c.debug {
fmt.Printf("%d: %v\n", len(c.binary), v)
fmt.Printf("%d: %x\n", len(c.out), b)
}
c.binary = append(c.binary, v)
c.out = append(c.out, b...)
}

// isPush returns whether the string op is either any of
Expand All @@ -263,13 +280,13 @@ type compileError struct {
}

func (err compileError) Error() string {
return fmt.Sprintf("%d syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want)
return fmt.Sprintf("%d: syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want)
}

func compileErr(c token, got, want string) error {
return compileError{
got: got,
want: want,
lineno: c.lineno,
lineno: c.lineno + 1,
}
}
8 changes: 8 additions & 0 deletions core/asm/compiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ func TestCompiler(t *testing.T) {
`,
output: "6300000006565b",
},
{
input: `
JUMP @label
label: ;; comment
ADD ;; comment
`,
output: "6300000006565b01",
},
}
for _, test := range tests {
ch := Lex([]byte(test.input), false)
Expand Down
10 changes: 10 additions & 0 deletions core/asm/lex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,16 @@ func TestLexer(t *testing.T) {
input: "@label123",
tokens: []token{{typ: lineStart}, {typ: label, text: "label123"}, {typ: eof}},
},
// comment after label
{
input: "@label123 ;; comment",
tokens: []token{{typ: lineStart}, {typ: label, text: "label123"}, {typ: eof}},
},
// comment after instruction
{
input: "push 3 ;; comment\nadd",
tokens: []token{{typ: lineStart}, {typ: element, text: "push"}, {typ: number, text: "3"}, {typ: lineEnd, text: "\n"}, {typ: lineStart, lineno: 1}, {typ: element, lineno: 1, text: "add"}, {typ: eof, lineno: 1}},
},
}

for _, test := range tests {
Expand Down
40 changes: 12 additions & 28 deletions core/asm/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ type token struct {
// is able to parse and return.
type tokenType int

//go:generate go run golang.org/x/tools/cmd/stringer -type tokenType

const (
eof tokenType = iota // end of file
lineStart // emitted when a line starts
Expand All @@ -52,31 +54,13 @@ const (
labelDef // label definition is emitted when a new label is found
number // number is emitted when a number is found
stringValue // stringValue is emitted when a string has been found

Numbers = "1234567890" // characters representing any decimal number
HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
)

// String implements stringer
func (it tokenType) String() string {
if int(it) > len(stringtokenTypes) {
return "invalid"
}
return stringtokenTypes[it]
}

var stringtokenTypes = []string{
eof: "EOF",
lineStart: "new line",
lineEnd: "end of line",
invalidStatement: "invalid statement",
element: "element",
label: "label",
labelDef: "label definition",
number: "number",
stringValue: "string",
}
const (
decimalNumbers = "1234567890" // characters representing any decimal number
hexNumbers = decimalNumbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
)

// lexer is the basic construct for parsing
// source code and turning them in to tokens.
Expand Down Expand Up @@ -200,7 +184,6 @@ func lexLine(l *lexer) stateFn {
l.emit(lineEnd)
l.ignore()
l.lineno++

l.emit(lineStart)
case r == ';' && l.peek() == ';':
return lexComment
Expand All @@ -225,6 +208,7 @@ func lexLine(l *lexer) stateFn {
// of the line and discards the text.
func lexComment(l *lexer) stateFn {
l.acceptRunUntil('\n')
l.backup()
l.ignore()

return lexLine
Expand All @@ -234,7 +218,7 @@ func lexComment(l *lexer) stateFn {
// the lex text state function to advance the parsing
// process.
func lexLabel(l *lexer) stateFn {
l.acceptRun(Alpha + "_" + Numbers)
l.acceptRun(alpha + "_" + decimalNumbers)

l.emit(label)

Expand All @@ -253,9 +237,9 @@ func lexInsideString(l *lexer) stateFn {
}

func lexNumber(l *lexer) stateFn {
acceptance := Numbers
acceptance := decimalNumbers
if l.accept("xX") {
acceptance = HexadecimalNumbers
acceptance = hexNumbers
}
l.acceptRun(acceptance)

Expand All @@ -265,7 +249,7 @@ func lexNumber(l *lexer) stateFn {
}

func lexElement(l *lexer) stateFn {
l.acceptRun(Alpha + "_" + Numbers)
l.acceptRun(alpha + "_" + decimalNumbers)

if l.peek() == ':' {
l.emit(labelDef)
Expand Down
Loading