-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds base types for Smalltlak instruction API
Commit provides basic data structures for operating Smalltalk bytecodes in a friendly way. Base concept is stolen drom the LLVM and other software which deals with basic blocks and so on. Issue: #32
- Loading branch information
Showing
3 changed files
with
226 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
#ifndef LLST_INSTRUCTIONS_INCLUDED | ||
#define LLST_INSTRUCTIONS_INCLUDED | ||
|
||
#include <assert.h> | ||
#include <stdint.h> | ||
#include <vector> | ||
#include <list> | ||
#include <map> | ||
|
||
#include <types.h> | ||
#include <opcodes.h> | ||
|
||
namespace st { | ||
|
||
struct TSmalltalkInstruction { | ||
public: | ||
typedef opcode::Opcode TOpcode; | ||
typedef uint8_t TArgument; | ||
typedef uint16_t TExtra; | ||
typedef uint32_t TUnpackedBytecode; | ||
|
||
TSmalltalkInstruction(TOpcode opcode, TArgument argument = 0, TExtra extra = 0) | ||
: m_opcode(opcode), m_argument(argument), m_extra(extra) {} | ||
|
||
// Initialize instruction from the unpacked value | ||
TSmalltalkInstruction(TUnpackedBytecode bytecode) { | ||
m_opcode = static_cast<TOpcode>(bytecode & 0xFF); | ||
m_argument = static_cast<TArgument>((bytecode >> 8) & 0xFF); | ||
m_extra = static_cast<TExtra>((bytecode >> 16) & 0xFF); | ||
} | ||
|
||
// Decode instruction from method bytecode | ||
// Shifts bytePointer to the next instruction | ||
TSmalltalkInstruction(const TByteObject& byteCodes, uint16_t& bytePointer); | ||
|
||
TOpcode getOpcode() const { return m_opcode; } | ||
TArgument getArgument() const { return m_argument; } | ||
TExtra getExtra() const { return m_extra; } | ||
|
||
// Return fixed width representation of bytecode suitable for storing in arrays | ||
TUnpackedBytecode serialize() const { | ||
return static_cast<uint8_t>(m_opcode) | (m_argument << 8) | (m_extra << 16); | ||
} | ||
|
||
private: | ||
TOpcode m_opcode; | ||
TArgument m_argument; | ||
TExtra m_extra; | ||
}; | ||
|
||
struct TBasicBlock { | ||
public: | ||
typedef std::vector<TSmalltalkInstruction::TUnpackedBytecode> TInstructionVector; | ||
|
||
class iterator { | ||
friend class TBasicBlock; | ||
public: | ||
bool operator ==(const iterator& value) const { return m_instructionIterator == value.m_instructionIterator; } | ||
|
||
void operator ++() { | ||
assert(m_instructionIterator != m_instructions.end()); | ||
++m_instructionIterator; | ||
} | ||
|
||
const TSmalltalkInstruction operator *() const { | ||
assert(m_instructionIterator != m_instructions.end()); | ||
return TSmalltalkInstruction(* m_instructionIterator); | ||
} | ||
|
||
iterator(const iterator& copy) : m_instructionIterator(copy.m_instructionIterator) { } | ||
private: | ||
iterator(const TInstructionVector::iterator& iter) : m_instructionIterator(iter) { } | ||
TInstructionVector::iterator m_instructionIterator; | ||
}; | ||
|
||
iterator begin() { return iterator(m_instructions.begin()); } | ||
iterator end() { return iterator(m_instructions.end()); } | ||
|
||
// Append instruction to the end of basic block | ||
void append(TSmalltalkInstruction instruction) { | ||
m_instructions.push_back(instruction.serialize()); | ||
} | ||
|
||
// Insert instruction at specified position | ||
void insert(const iterator& position, TSmalltalkInstruction instruction) { | ||
m_instructions.insert(position.m_instructionIterator, instruction.serialize()); | ||
} | ||
|
||
// Replace existing instruction at specified position with the new one | ||
void replace(const iterator& position, TSmalltalkInstruction instruction) { | ||
assert(position.m_instructionIterator != m_instructions.end()); | ||
|
||
const TInstructionVector::iterator replacePosition = position.m_instructionIterator; | ||
*replacePosition = instruction.serialize(); | ||
} | ||
|
||
// Remove instruction from basic block | ||
void remove(const iterator& position) { | ||
assert(position.m_instructionIterator != m_instructions.end()); | ||
m_instructions.erase(position.m_instructionIterator); | ||
} | ||
|
||
// Split current basic block at specified position | ||
// Current block will hold instructions prior to the cut position | ||
// Returned block will hold the rest | ||
TBasicBlock* split(const iterator& position) { | ||
TBasicBlock* newBlock = new TBasicBlock; | ||
std::copy(position.m_instructionIterator, m_instructions.end(), newBlock->m_instructions.begin()); | ||
m_instructions.erase(position.m_instructionIterator, m_instructions.end()); | ||
// TODO insert jump instruction and add newBlock to the parsed method | ||
return newBlock; | ||
} | ||
|
||
private: | ||
TInstructionVector m_instructions; | ||
}; | ||
|
||
struct TParsedMethod { | ||
public: | ||
typedef std::list<TBasicBlock*> TBasicBlockList; | ||
|
||
class iterator { | ||
friend class TParsedMethod; | ||
public: | ||
bool operator ==(const iterator& value) const { return m_basicBlockIterator == value.m_basicBlockIterator; } | ||
|
||
void operator ++() { | ||
assert(m_basicBlockIterator != m_basicBlocks.end()); | ||
++m_basicBlockIterator; | ||
} | ||
|
||
TBasicBlock* operator *() const { return * m_basicBlockIterator; } | ||
TBasicBlock* operator ->() const { return * m_basicBlockIterator; } | ||
|
||
iterator(const iterator& copy) : m_basicBlockIterator(copy.m_basicBlockIterator) { } | ||
private: | ||
iterator(const TBasicBlockList::iterator& iter) : m_basicBlockIterator(iter) { } | ||
TBasicBlockList::iterator m_basicBlockIterator; | ||
}; | ||
|
||
iterator begin() { return iterator(m_basicBlocks.begin()); } | ||
iterator end() { return iterator(m_basicBlocks.end()); } | ||
|
||
TBasicBlock* createBasicBlock() { | ||
m_basicBlocks.push_back(new TBasicBlock); | ||
return m_basicBlocks.back(); | ||
} | ||
|
||
TParsedMethod(TMethod* method); | ||
TParsedMethod() {} | ||
|
||
~TParsedMethod() { | ||
for (TBasicBlockList::iterator iBlock = m_basicBlocks.begin(), | ||
end = m_basicBlocks.end(); iBlock != end; ++iBlock) | ||
{ | ||
delete * iBlock; | ||
} | ||
} | ||
|
||
private: | ||
void parse(TMethod* method); | ||
|
||
private: | ||
TBasicBlockList m_basicBlocks; | ||
|
||
typedef std::map<uint16_t, TBasicBlock*> TOffsetToBasicBlockMap; | ||
TOffsetToBasicBlockMap m_offsetToBasicBlock; | ||
}; | ||
|
||
} // namespace st | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#include <instructions.h> | ||
|
||
using namespace st; | ||
|
||
TSmalltalkInstruction::TSmalltalkInstruction(const TByteObject& byteCodes, uint16_t& bytePointer) | ||
: m_opcode(opcode::extended), m_argument(0), m_extra(0) | ||
{ | ||
const uint8_t& bytecode = byteCodes[bytePointer++]; | ||
|
||
// For normal bytecodes higher part of the byte holds opcode | ||
// whether lower part holds the argument | ||
m_opcode = static_cast<TOpcode>(bytecode >> 4); | ||
m_argument = bytecode & 0x0F; | ||
|
||
// Extended opcodes encode argument in a separate byte | ||
// Opcode is stored in a lower half of the first byte | ||
if (m_opcode == opcode::extended) { | ||
m_opcode = static_cast<TOpcode>(m_argument); | ||
m_argument = byteCodes[bytePointer++]; | ||
} | ||
|
||
// Some instructions hold extra data in a bytes right after instruction | ||
switch (m_opcode) { | ||
case opcode::pushBlock: | ||
// Storing bytecode offset as extra | ||
m_extra = byteCodes[bytePointer] | (byteCodes[bytePointer+1] << 8); | ||
bytePointer += 2; | ||
break; | ||
|
||
case opcode::doPrimitive: | ||
// Primitive number do not fit into lower 4 bits of opcode byte. | ||
// So it is stored in a separate byte right after. Technically, | ||
// this value is an argument for instruction so it would be logical | ||
// to hold it in the argument field. | ||
m_argument = byteCodes[bytePointer++]; | ||
break; | ||
|
||
case opcode::doSpecial: | ||
switch (m_argument) { | ||
case special::branch: | ||
case special::branchIfTrue: | ||
case special::branchIfFalse: | ||
// Storing jump target offset as extra | ||
m_extra = byteCodes[bytePointer] | (byteCodes[bytePointer+1] << 8); | ||
bytePointer += 2; | ||
} | ||
break; | ||
|
||
default: // Nothing to do here | ||
break; | ||
} | ||
} |