Skip to content

Commit

Permalink
Adds base types for Smalltlak instruction API
Browse files Browse the repository at this point in the history
Commit provides basic data structures for
operating Smalltalk bytecodes in a friendly way.

Base concept is stolen drom the LLVM and other
software which deals with basic blocks and so on.

Issue: #32
  • Loading branch information
0x7CFE committed Mar 19, 2014
1 parent 1257df7 commit 67734bc
Show file tree
Hide file tree
Showing 3 changed files with 226 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ set(CPP_FILES
src/TInstruction.cpp
src/TSymbol.cpp
src/vm.cpp

src/TSmalltalkInstruction.cpp
)

if (LLVM)
Expand Down
172 changes: 172 additions & 0 deletions include/instructions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#ifndef LLST_INSTRUCTIONS_INCLUDED
#define LLST_INSTRUCTIONS_INCLUDED

#include <assert.h>
#include <stdint.h>
#include <vector>
#include <list>
#include <map>

#include <types.h>
#include <opcodes.h>

namespace st {

struct TSmalltalkInstruction {
public:
typedef opcode::Opcode TOpcode;
typedef uint8_t TArgument;
typedef uint16_t TExtra;
typedef uint32_t TUnpackedBytecode;

TSmalltalkInstruction(TOpcode opcode, TArgument argument = 0, TExtra extra = 0)
: m_opcode(opcode), m_argument(argument), m_extra(extra) {}

// Initialize instruction from the unpacked value
TSmalltalkInstruction(TUnpackedBytecode bytecode) {
m_opcode = static_cast<TOpcode>(bytecode & 0xFF);
m_argument = static_cast<TArgument>((bytecode >> 8) & 0xFF);
m_extra = static_cast<TExtra>((bytecode >> 16) & 0xFF);
}

// Decode instruction from method bytecode
// Shifts bytePointer to the next instruction
TSmalltalkInstruction(const TByteObject& byteCodes, uint16_t& bytePointer);

TOpcode getOpcode() const { return m_opcode; }
TArgument getArgument() const { return m_argument; }
TExtra getExtra() const { return m_extra; }

// Return fixed width representation of bytecode suitable for storing in arrays
TUnpackedBytecode serialize() const {
return static_cast<uint8_t>(m_opcode) | (m_argument << 8) | (m_extra << 16);
}

private:
TOpcode m_opcode;
TArgument m_argument;
TExtra m_extra;
};

struct TBasicBlock {
public:
typedef std::vector<TSmalltalkInstruction::TUnpackedBytecode> TInstructionVector;

class iterator {
friend class TBasicBlock;
public:
bool operator ==(const iterator& value) const { return m_instructionIterator == value.m_instructionIterator; }

void operator ++() {
assert(m_instructionIterator != m_instructions.end());
++m_instructionIterator;
}

const TSmalltalkInstruction operator *() const {
assert(m_instructionIterator != m_instructions.end());
return TSmalltalkInstruction(* m_instructionIterator);
}

iterator(const iterator& copy) : m_instructionIterator(copy.m_instructionIterator) { }
private:
iterator(const TInstructionVector::iterator& iter) : m_instructionIterator(iter) { }
TInstructionVector::iterator m_instructionIterator;
};

iterator begin() { return iterator(m_instructions.begin()); }
iterator end() { return iterator(m_instructions.end()); }

// Append instruction to the end of basic block
void append(TSmalltalkInstruction instruction) {
m_instructions.push_back(instruction.serialize());
}

// Insert instruction at specified position
void insert(const iterator& position, TSmalltalkInstruction instruction) {
m_instructions.insert(position.m_instructionIterator, instruction.serialize());
}

// Replace existing instruction at specified position with the new one
void replace(const iterator& position, TSmalltalkInstruction instruction) {
assert(position.m_instructionIterator != m_instructions.end());

const TInstructionVector::iterator replacePosition = position.m_instructionIterator;
*replacePosition = instruction.serialize();
}

// Remove instruction from basic block
void remove(const iterator& position) {
assert(position.m_instructionIterator != m_instructions.end());
m_instructions.erase(position.m_instructionIterator);
}

// Split current basic block at specified position
// Current block will hold instructions prior to the cut position
// Returned block will hold the rest
TBasicBlock* split(const iterator& position) {
TBasicBlock* newBlock = new TBasicBlock;
std::copy(position.m_instructionIterator, m_instructions.end(), newBlock->m_instructions.begin());
m_instructions.erase(position.m_instructionIterator, m_instructions.end());
// TODO insert jump instruction and add newBlock to the parsed method
return newBlock;
}

private:
TInstructionVector m_instructions;
};

struct TParsedMethod {
public:
typedef std::list<TBasicBlock*> TBasicBlockList;

class iterator {
friend class TParsedMethod;
public:
bool operator ==(const iterator& value) const { return m_basicBlockIterator == value.m_basicBlockIterator; }

void operator ++() {
assert(m_basicBlockIterator != m_basicBlocks.end());
++m_basicBlockIterator;
}

TBasicBlock* operator *() const { return * m_basicBlockIterator; }
TBasicBlock* operator ->() const { return * m_basicBlockIterator; }

iterator(const iterator& copy) : m_basicBlockIterator(copy.m_basicBlockIterator) { }
private:
iterator(const TBasicBlockList::iterator& iter) : m_basicBlockIterator(iter) { }
TBasicBlockList::iterator m_basicBlockIterator;
};

iterator begin() { return iterator(m_basicBlocks.begin()); }
iterator end() { return iterator(m_basicBlocks.end()); }

TBasicBlock* createBasicBlock() {
m_basicBlocks.push_back(new TBasicBlock);
return m_basicBlocks.back();
}

TParsedMethod(TMethod* method);
TParsedMethod() {}

~TParsedMethod() {
for (TBasicBlockList::iterator iBlock = m_basicBlocks.begin(),
end = m_basicBlocks.end(); iBlock != end; ++iBlock)
{
delete * iBlock;
}
}

private:
void parse(TMethod* method);

private:
TBasicBlockList m_basicBlocks;

typedef std::map<uint16_t, TBasicBlock*> TOffsetToBasicBlockMap;
TOffsetToBasicBlockMap m_offsetToBasicBlock;
};

} // namespace st

#endif
52 changes: 52 additions & 0 deletions src/TSmalltalkInstruction.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#include <instructions.h>

using namespace st;

TSmalltalkInstruction::TSmalltalkInstruction(const TByteObject& byteCodes, uint16_t& bytePointer)
: m_opcode(opcode::extended), m_argument(0), m_extra(0)
{
const uint8_t& bytecode = byteCodes[bytePointer++];

// For normal bytecodes higher part of the byte holds opcode
// whether lower part holds the argument
m_opcode = static_cast<TOpcode>(bytecode >> 4);
m_argument = bytecode & 0x0F;

// Extended opcodes encode argument in a separate byte
// Opcode is stored in a lower half of the first byte
if (m_opcode == opcode::extended) {
m_opcode = static_cast<TOpcode>(m_argument);
m_argument = byteCodes[bytePointer++];
}

// Some instructions hold extra data in a bytes right after instruction
switch (m_opcode) {
case opcode::pushBlock:
// Storing bytecode offset as extra
m_extra = byteCodes[bytePointer] | (byteCodes[bytePointer+1] << 8);
bytePointer += 2;
break;

case opcode::doPrimitive:
// Primitive number do not fit into lower 4 bits of opcode byte.
// So it is stored in a separate byte right after. Technically,
// this value is an argument for instruction so it would be logical
// to hold it in the argument field.
m_argument = byteCodes[bytePointer++];
break;

case opcode::doSpecial:
switch (m_argument) {
case special::branch:
case special::branchIfTrue:
case special::branchIfFalse:
// Storing jump target offset as extra
m_extra = byteCodes[bytePointer] | (byteCodes[bytePointer+1] << 8);
bytePointer += 2;
}
break;

default: // Nothing to do here
break;
}
}

0 comments on commit 67734bc

Please sign in to comment.