Skip to content

Commit

Permalink
SPU: basic function analysis implemented
Browse files Browse the repository at this point in the history
Basic stack frame layout analysis.
Function detection in Giga mode.
Basic use of new information in SPU LLVM.

This is WIP and may not work correctly.
Optimizations include but not limited to:
 * Compiling SPU functions as native functions when eligible
 * Avoiding register context write-out
  • Loading branch information
Nekotekina committed May 9, 2019
1 parent 7ead021 commit a45aa5d
Show file tree
Hide file tree
Showing 13 changed files with 1,447 additions and 465 deletions.
25 changes: 5 additions & 20 deletions Utilities/JIT.cpp
Expand Up @@ -474,7 +474,7 @@ struct MemoryManager : llvm::RTDyldMemoryManager
s_unfire.push_front(std::make_pair(addr, size));
#endif

return RTDyldMemoryManager::registerEHFrames(addr, load_addr, size);
return RTDyldMemoryManager::registerEHFramesInProcess(addr, size);
}

void deregisterEHFrames() override
Expand Down Expand Up @@ -508,6 +508,10 @@ struct MemoryManager2 : llvm::RTDyldMemoryManager

void registerEHFrames(u8* addr, u64 load_addr, std::size_t size) override
{
#ifndef _WIN32
RTDyldMemoryManager::registerEHFramesInProcess(addr, size);
s_unfire.push_front(std::make_pair(addr, size));
#endif
}

void deregisterEHFrames() override
Expand Down Expand Up @@ -770,25 +774,6 @@ jit_compiler::~jit_compiler()
{
}

bool jit_compiler::has_ssse3() const
{
if (m_cpu == "generic" ||
m_cpu == "k8" ||
m_cpu == "opteron" ||
m_cpu == "athlon64" ||
m_cpu == "athlon-fx" ||
m_cpu == "k8-sse3" ||
m_cpu == "opteron-sse3" ||
m_cpu == "athlon64-sse3" ||
m_cpu == "amdfam10" ||
m_cpu == "barcelona")
{
return false;
}

return true;
}

void jit_compiler::add(std::unique_ptr<llvm::Module> module, const std::string& path)
{
ObjectCache cache{path};
Expand Down
3 changes: 0 additions & 3 deletions Utilities/JIT.h
Expand Up @@ -142,9 +142,6 @@ class jit_compiler final
return *m_engine;
}

// Test SSSE3 feature
bool has_ssse3() const;

// Add module (path to obj cache dir)
void add(std::unique_ptr<llvm::Module> module, const std::string& path);

Expand Down
47 changes: 47 additions & 0 deletions rpcs3/Emu/CPU/CPUTranslator.cpp
Expand Up @@ -9,7 +9,54 @@ cpu_translator::cpu_translator(llvm::Module* module, bool is_be)
, m_module(module)
, m_is_be(is_be)
{
}

void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine)
{
m_context = context;
m_engine = &engine;

const auto cpu = m_engine->getTargetMachine()->getTargetCPU();

m_use_ssse3 = true;

// Test SSSE3 feature (TODO)
if (cpu == "generic" ||
cpu == "k8" ||
cpu == "opteron" ||
cpu == "athlon64" ||
cpu == "athlon-fx" ||
cpu == "k8-sse3" ||
cpu == "opteron-sse3" ||
cpu == "athlon64-sse3" ||
cpu == "amdfam10" ||
cpu == "barcelona")
{
m_use_ssse3 = false;
}
}

llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type)
{
uint s1 = type->getScalarSizeInBits();
uint s2 = val->getType()->getScalarSizeInBits();

if (type->isVectorTy())
s1 *= type->getVectorNumElements();
if (val->getType()->isVectorTy())
s2 *= val->getType()->getVectorNumElements();

if (s1 != s2)
{
fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)", s1, s2);
}

if (const auto c1 = llvm::dyn_cast<llvm::Constant>(val))
{
return verify(HERE, llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c1, type, m_module->getDataLayout()));
}

return m_ir->CreateBitCast(val, type);
}

template <>
Expand Down
28 changes: 28 additions & 0 deletions rpcs3/Emu/CPU/CPUTranslator.h
Expand Up @@ -9,6 +9,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Analysis/ConstantFolding.h"
#ifdef _MSC_VER
#pragma warning(pop)
Expand All @@ -19,6 +20,8 @@
#include "../Utilities/StrFmt.h"
#include "../Utilities/BEType.h"
#include "../Utilities/BitField.h"
#include "../Utilities/Log.h"
#include "../Utilities/JIT.h"

#include <unordered_map>
#include <map>
Expand Down Expand Up @@ -2368,6 +2371,9 @@ class cpu_translator
// Module to which all generated code is output to
llvm::Module* m_module;

// Execution engine from JIT instance
llvm::ExecutionEngine* m_engine{};

// Endianness, affects vector element numbering (TODO)
bool m_is_be;

Expand All @@ -2377,6 +2383,8 @@ class cpu_translator
// IR builder
llvm::IRBuilder<>* m_ir;

void initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine);

public:
// Convert a C++ type to an LLVM type (TODO: remove)
template <typename T>
Expand Down Expand Up @@ -2421,6 +2429,26 @@ class cpu_translator
return result;
}

// Call external function: provide name and function pointer
template <typename RT, typename... FArgs, typename... Args>
llvm::CallInst* call(std::string_view lame, RT(*_func)(FArgs...), Args... args)
{
static_assert(sizeof...(FArgs) == sizeof...(Args), "spu_llvm_recompiler::call(): unexpected arg number");
const auto type = llvm::FunctionType::get(get_type<RT>(), {args->getType()...}, false);
const auto func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction({lame.data(), lame.size()}, type).getCallee());
m_engine->addGlobalMapping({lame.data(), lame.size()}, reinterpret_cast<std::uintptr_t>(_func));
return m_ir->CreateCall(func, {args...});
}

// Bitcast with immediate constant folding
llvm::Value* bitcast(llvm::Value* val, llvm::Type* type);

template <typename T>
llvm::Value* bitcast(llvm::Value* val)
{
return bitcast(val, get_type<T>());
}

template <typename T>
static llvm_placeholder_t<T> match()
{
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/PPUInterpreter.cpp
Expand Up @@ -4677,7 +4677,7 @@ bool ppu_interpreter::MTFSB0(ppu_thread& ppu, ppu_opcode_t op)
bool ppu_interpreter::MTFSFI(ppu_thread& ppu, ppu_opcode_t op)
{
const u32 bf = op.crfd * 4;
if (bf != 4 * 4)
if (bf != 4 * 4)
{
// Do nothing on non-FPCC field (TODO)
LOG_WARNING(PPU, "MTFSFI(%d)", op.crfd);
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/PPUThread.cpp
Expand Up @@ -1711,7 +1711,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout());

// Initialize translator
PPUTranslator translator(jit.get_context(), module.get(), module_part, jit.has_ssse3());
PPUTranslator translator(jit.get_context(), module.get(), module_part, jit.get_engine());

// Define some types
const auto _void = Type::getVoidTy(jit.get_context());
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/PPUThread.h
Expand Up @@ -79,7 +79,7 @@ class ppu_thread : public cpu_thread
result |= bit;
}

return result;
return result;
}

// Unpack CR bits
Expand Down
5 changes: 2 additions & 3 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Expand Up @@ -11,14 +11,13 @@ using namespace llvm;

const ppu_decoder<PPUTranslator> s_ppu_decoder;

PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, const ppu_module& info, bool ssse3)
PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, const ppu_module& info, ExecutionEngine& engine)
: cpu_translator(module, false)
, m_info(info)
, m_pure_attr(AttributeList::get(m_context, AttributeList::FunctionIndex, {Attribute::NoUnwind, Attribute::ReadNone}))
{
// Bind context
m_context = context;
m_use_ssse3 = ssse3;
cpu_translator::initialize(context, engine);

// There is no weak linkage on JIT, so let's create variables with different names for each module part
const u32 gsuffix = m_info.name.empty() ? info.funcs[0].addr : info.funcs[0].addr - m_info.segs[0].addr;
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/PPUTranslator.h
Expand Up @@ -315,7 +315,7 @@ class PPUTranslator final : public cpu_translator
// Handle compilation errors
void CompilationError(const std::string& error);

PPUTranslator(llvm::LLVMContext& context, llvm::Module* module, const ppu_module& info, bool ssse3);
PPUTranslator(llvm::LLVMContext& context, llvm::Module* module, const ppu_module& info, llvm::ExecutionEngine& engine);
~PPUTranslator();

// Get thread context struct type
Expand Down
27 changes: 18 additions & 9 deletions rpcs3/Emu/Cell/SPUAnalyser.h
Expand Up @@ -11,6 +11,7 @@ struct spu_itype
static constexpr struct branch_tag{} branch{}; // Branch Instructions
static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions
static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions
static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values

enum type : unsigned char
{
Expand Down Expand Up @@ -146,24 +147,26 @@ struct spu_itype
FMS, // quadrop_tag last

FA,
DFA,
FS,
DFS,
FM,
FREST,
FRSQEST,
FI,
CSFLT,
CUFLT,
FRDS, // xfloat_tag last

DFA,
DFS,
DFM,
DFMA,
DFNMS,
DFMS,
DFNMA,
FREST,
FRSQEST,
FI,
CSFLT,
FESD,

CFLTS,
CUFLT,
CFLTU,
FRDS,
FESD,
FCEQ,
FCMEQ,
FCGT,
Expand Down Expand Up @@ -252,6 +255,12 @@ struct spu_itype
{
return value >= MPYA && value <= FMS;
}

// Test for xfloat instruction
friend constexpr bool operator &(type value, xfloat_tag)
{
return value >= FMA && value <= FRDS;
}
};

struct spu_iflag
Expand Down

0 comments on commit a45aa5d

Please sign in to comment.