Skip to content

Commit

Permalink
Use LLVM 9
Browse files Browse the repository at this point in the history
Use new add/sub with saturation intrinsics
  • Loading branch information
Nekotekina committed Mar 29, 2019
1 parent d77fed6 commit 62a8089
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 92 deletions.
8 changes: 4 additions & 4 deletions 3rdparty/llvm.cmake
Expand Up @@ -26,7 +26,7 @@ if(WITH_LLVM)
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})

# now tries to find LLVM again
find_package(LLVM 8.0 CONFIG)
find_package(LLVM 9.0 CONFIG)
if(NOT LLVM_FOUND)
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
endif()
Expand All @@ -39,11 +39,11 @@ if(WITH_LLVM)
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
endif()

find_package(LLVM 8.0 CONFIG)
find_package(LLVM 9.0 CONFIG)

if (NOT LLVM_FOUND)
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 8)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 8.0. \
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 9)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 9.0. \
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
endif()

Expand Down
2 changes: 1 addition & 1 deletion Utilities/JIT.cpp
Expand Up @@ -570,7 +570,7 @@ struct EventListener : llvm::JITEventListener
{
}

void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
void notifyObjectLoaded(ObjectKey K, const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
{
#ifdef _WIN32
for (auto it = obj.section_begin(), end = obj.section_end(); it != end; ++it)
Expand Down
2 changes: 1 addition & 1 deletion llvm
Submodule llvm updated 12839 files
120 changes: 97 additions & 23 deletions rpcs3/Emu/CPU/CPUTranslator.h
Expand Up @@ -992,13 +992,6 @@ class cpu_translator
return (b ^ s) & ~(a ^ b);
}

// Get signed subtraction overflow into the sign bit (d = a - b)
template <typename T>
static inline auto sborrow(T a, T b, T d)
{
return (a ^ b) & (a ^ d);
}

// Bitwise select (c ? a : b)
template <typename T>
static inline auto merge(T c, T a, T b)
Expand All @@ -1014,12 +1007,96 @@ class cpu_translator
return a << (b & mask) | a >> (-b & mask);
}

// Rotate left
// Add with saturation
template <typename T>
static inline auto rol(T a, u64 b)
inline auto add_sat(T a, T b)
{
static constexpr u64 mask = value_t<typename T::type>::esize - 1;
return a << (b & mask) | a >> ((0 - b) & mask);
value_t<typename T::type> result;
const auto eva = a.eval(m_ir);
const auto evb = b.eval(m_ir);

// Compute constant result immediately if possible
if (llvm::isa<llvm::Constant>(eva) && llvm::isa<llvm::Constant>(evb))
{
static_assert(result.is_sint || result.is_uint);

if constexpr (result.is_sint)
{
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);

const auto axt = m_ir->CreateSExt(eva, cast_to);
const auto bxt = m_ir->CreateSExt(evb, cast_to);
result.value = m_ir->CreateAdd(axt, bxt);
const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize));
const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize));
const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value);
result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value);
result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context));
}
else
{
const auto _max = m_ir->getInt(llvm::APInt::getMaxValue(result.esize));
const auto ones = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
result.value = m_ir->CreateAdd(eva, evb);
result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(result.value, eva), ones, result.value);
}
}
else
{
result.value = m_ir->CreateCall(get_intrinsic<typename T::type>(result.is_sint ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat), {eva, evb});
}

return result;
}

// Subtract with saturation
template <typename T>
inline auto sub_sat(T a, T b)
{
value_t<typename T::type> result;
const auto eva = a.eval(m_ir);
const auto evb = b.eval(m_ir);

// Compute constant result immediately if possible
if (llvm::isa<llvm::Constant>(eva) && llvm::isa<llvm::Constant>(evb))
{
static_assert(result.is_sint || result.is_uint);

if constexpr (result.is_sint)
{
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);

const auto axt = m_ir->CreateSExt(eva, cast_to);
const auto bxt = m_ir->CreateSExt(evb, cast_to);
result.value = m_ir->CreateSub(axt, bxt);
const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize));
const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize));
const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value);
result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value);
result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context));
}
else
{
const auto _min = m_ir->getInt(llvm::APInt::getMinValue(result.esize));
const auto zero = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSub(eva, evb);
result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(eva, evb), zero, result.value);
}
}
else
{
result.value = m_ir->CreateCall(get_intrinsic<typename T::type>(result.is_sint ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat), {eva, evb});
}

return result;
}

// Average: (a + b + 1) >> 1
Expand All @@ -1029,18 +1106,15 @@ class cpu_translator
//return (a >> 1) + (b >> 1) + ((a | b) & 1);

value_t<typename T::type> result;
llvm::Instruction::CastOps cast_op = llvm::Instruction::BitCast;
if (result.is_sint)
cast_op = llvm::Instruction::SExt;
if (result.is_uint)
cast_op = llvm::Instruction::ZExt;
llvm::Type* cast_t = m_ir->getIntNTy(result.esize * 2);
if (result.is_vector != 0)
cast_t = llvm::VectorType::get(cast_t, result.is_vector);

const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_t);
const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_t);
const auto cxt = llvm::ConstantInt::get(cast_t, 1, false);
static_assert(result.is_sint || result.is_uint);
const auto cast_op = result.is_sint ? llvm::Instruction::SExt : llvm::Instruction::ZExt;
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);

const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_to);
const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_to);
const auto cxt = llvm::ConstantInt::get(cast_to, 1, false);
const auto abc = m_ir->CreateAdd(m_ir->CreateAdd(axt, bxt), cxt);
result.value = m_ir->CreateTrunc(m_ir->CreateLShr(abc, 1), result.get_type(m_context));
return result;
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/PPUThread.cpp
Expand Up @@ -1690,7 +1690,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
{
if (func.size)
{
const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func));
const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func).getCallee());
f->addAttribute(1, Attribute::NoAlias);
}
}
Expand Down
92 changes: 37 additions & 55 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Expand Up @@ -267,7 +267,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
return;
}

indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type);
indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type).getCallee();
}
else
{
Expand Down Expand Up @@ -597,33 +597,27 @@ void PPUTranslator::VADDSBS(ppu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.va);
const auto b = get_vr<s8[16]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 7) ^ 0x7f);
const auto x = eval(scarry(a, b, s) >> 7);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDSHS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s16[8]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 15) ^ 0x7fff);
const auto x = eval(scarry(a, b, s) >> 15);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDSWS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 31) ^ 0x7fffffff);
const auto x = eval(scarry(a, b, s) >> 31);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDUBM(ppu_opcode_t op)
Expand All @@ -637,10 +631,9 @@ void PPUTranslator::VADDUBS(ppu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.va);
const auto b = get_vr<u8[16]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u8[16]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDUHM(ppu_opcode_t op)
Expand All @@ -654,10 +647,9 @@ void PPUTranslator::VADDUHS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u16[8]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDUWM(ppu_opcode_t op)
Expand All @@ -671,10 +663,9 @@ void PPUTranslator::VADDUWS(ppu_opcode_t op)
{
const auto a = get_vr<u32[4]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u32[4]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VAND(ppu_opcode_t op)
Expand Down Expand Up @@ -1491,33 +1482,27 @@ void PPUTranslator::VSUBSBS(ppu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.va);
const auto b = get_vr<s8[16]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 7) ^ 0x7f);
const auto x = eval(sborrow(a, b, d) >> 7);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBSHS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s16[8]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 15) ^ 0x7fff);
const auto x = eval(sborrow(a, b, d) >> 15);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBSWS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 31) ^ 0x7fffffff);
const auto x = eval(sborrow(a, b, d) >> 31);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBUBM(ppu_opcode_t op)
Expand All @@ -1531,10 +1516,9 @@ void PPUTranslator::VSUBUBS(ppu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.va);
const auto b = get_vr<u8[16]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u8[16]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBUHM(ppu_opcode_t op)
Expand All @@ -1548,10 +1532,9 @@ void PPUTranslator::VSUBUHS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u16[8]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBUWM(ppu_opcode_t op)
Expand All @@ -1565,10 +1548,9 @@ void PPUTranslator::VSUBUWS(ppu_opcode_t op)
{
const auto a = get_vr<u32[4]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u32[4]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUMSWS(ppu_opcode_t op)
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/PPUTranslator.h
Expand Up @@ -296,7 +296,7 @@ class PPUTranslator final : public cpu_translator
llvm::CallInst* Call(llvm::Type* ret, llvm::AttributeList attr, llvm::StringRef name, Args... args)
{
// Call the function
return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...), {args...});
return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...).getCallee(), {args...});
}

// Call a function
Expand Down

0 comments on commit 62a8089

Please sign in to comment.