Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update LLVM [WIP] #5770

Merged
merged 3 commits into from Mar 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions 3rdparty/llvm.cmake
Expand Up @@ -26,7 +26,7 @@ if(WITH_LLVM)
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})

# now tries to find LLVM again
find_package(LLVM 8.0 CONFIG)
find_package(LLVM 9.0 CONFIG)
if(NOT LLVM_FOUND)
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
endif()
Expand All @@ -39,11 +39,11 @@ if(WITH_LLVM)
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
endif()

find_package(LLVM 8.0 CONFIG)
find_package(LLVM 9.0 CONFIG)

if (NOT LLVM_FOUND)
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 8)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 8.0. \
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 9)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 9.0. \
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
endif()

Expand Down
2 changes: 1 addition & 1 deletion Utilities/JIT.cpp
Expand Up @@ -570,7 +570,7 @@ struct EventListener : llvm::JITEventListener
{
}

void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
void notifyObjectLoaded(ObjectKey K, const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
{
#ifdef _WIN32
for (auto it = obj.section_begin(), end = obj.section_end(); it != end; ++it)
Expand Down
2 changes: 1 addition & 1 deletion llvm
Submodule llvm updated 12839 files
120 changes: 97 additions & 23 deletions rpcs3/Emu/CPU/CPUTranslator.h
Expand Up @@ -992,13 +992,6 @@ class cpu_translator
return (b ^ s) & ~(a ^ b);
}

// Get signed subtraction overflow into the sign bit (d = a - b)
template <typename T>
static inline auto sborrow(T a, T b, T d)
{
return (a ^ b) & (a ^ d);
}

// Bitwise select (c ? a : b)
template <typename T>
static inline auto merge(T c, T a, T b)
Expand All @@ -1014,12 +1007,96 @@ class cpu_translator
return a << (b & mask) | a >> (-b & mask);
}

// Rotate left
// Add with saturation
template <typename T>
static inline auto rol(T a, u64 b)
inline auto add_sat(T a, T b)
{
static constexpr u64 mask = value_t<typename T::type>::esize - 1;
return a << (b & mask) | a >> ((0 - b) & mask);
value_t<typename T::type> result;
const auto eva = a.eval(m_ir);
const auto evb = b.eval(m_ir);

// Compute constant result immediately if possible
if (llvm::isa<llvm::Constant>(eva) && llvm::isa<llvm::Constant>(evb))
{
static_assert(result.is_sint || result.is_uint);

if constexpr (result.is_sint)
{
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);

const auto axt = m_ir->CreateSExt(eva, cast_to);
const auto bxt = m_ir->CreateSExt(evb, cast_to);
result.value = m_ir->CreateAdd(axt, bxt);
const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize));
const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize));
const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value);
result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value);
result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context));
}
else
{
const auto _max = m_ir->getInt(llvm::APInt::getMaxValue(result.esize));
const auto ones = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
result.value = m_ir->CreateAdd(eva, evb);
result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(result.value, eva), ones, result.value);
}
}
else
{
result.value = m_ir->CreateCall(get_intrinsic<typename T::type>(result.is_sint ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat), {eva, evb});
}

return result;
}

// Subtract with saturation
template <typename T>
inline auto sub_sat(T a, T b)
{
value_t<typename T::type> result;
const auto eva = a.eval(m_ir);
const auto evb = b.eval(m_ir);

// Compute constant result immediately if possible
if (llvm::isa<llvm::Constant>(eva) && llvm::isa<llvm::Constant>(evb))
{
static_assert(result.is_sint || result.is_uint);

if constexpr (result.is_sint)
{
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);

const auto axt = m_ir->CreateSExt(eva, cast_to);
const auto bxt = m_ir->CreateSExt(evb, cast_to);
result.value = m_ir->CreateSub(axt, bxt);
const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize));
const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize));
const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value);
result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value);
result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context));
}
else
{
const auto _min = m_ir->getInt(llvm::APInt::getMinValue(result.esize));
const auto zero = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSub(eva, evb);
result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(eva, evb), zero, result.value);
}
}
else
{
result.value = m_ir->CreateCall(get_intrinsic<typename T::type>(result.is_sint ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat), {eva, evb});
}

return result;
}

// Average: (a + b + 1) >> 1
Expand All @@ -1029,18 +1106,15 @@ class cpu_translator
//return (a >> 1) + (b >> 1) + ((a | b) & 1);

value_t<typename T::type> result;
llvm::Instruction::CastOps cast_op = llvm::Instruction::BitCast;
if (result.is_sint)
cast_op = llvm::Instruction::SExt;
if (result.is_uint)
cast_op = llvm::Instruction::ZExt;
llvm::Type* cast_t = m_ir->getIntNTy(result.esize * 2);
if (result.is_vector != 0)
cast_t = llvm::VectorType::get(cast_t, result.is_vector);

const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_t);
const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_t);
const auto cxt = llvm::ConstantInt::get(cast_t, 1, false);
static_assert(result.is_sint || result.is_uint);
const auto cast_op = result.is_sint ? llvm::Instruction::SExt : llvm::Instruction::ZExt;
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);

const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_to);
const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_to);
const auto cxt = llvm::ConstantInt::get(cast_to, 1, false);
const auto abc = m_ir->CreateAdd(m_ir->CreateAdd(axt, bxt), cxt);
result.value = m_ir->CreateTrunc(m_ir->CreateLShr(abc, 1), result.get_type(m_context));
return result;
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/PPUThread.cpp
Expand Up @@ -1690,7 +1690,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
{
if (func.size)
{
const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func));
const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func).getCallee());
f->addAttribute(1, Attribute::NoAlias);
}
}
Expand Down
96 changes: 39 additions & 57 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Expand Up @@ -267,7 +267,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
return;
}

indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type);
indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type).getCallee();
}
else
{
Expand Down Expand Up @@ -597,33 +597,27 @@ void PPUTranslator::VADDSBS(ppu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.va);
const auto b = get_vr<s8[16]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 7) ^ 0x7f);
const auto x = eval(scarry(a, b, s) >> 7);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDSHS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s16[8]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 15) ^ 0x7fff);
const auto x = eval(scarry(a, b, s) >> 15);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDSWS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 31) ^ 0x7fffffff);
const auto x = eval(scarry(a, b, s) >> 31);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDUBM(ppu_opcode_t op)
Expand All @@ -637,10 +631,9 @@ void PPUTranslator::VADDUBS(ppu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.va);
const auto b = get_vr<u8[16]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u8[16]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDUHM(ppu_opcode_t op)
Expand All @@ -654,10 +647,9 @@ void PPUTranslator::VADDUHS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u16[8]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VADDUWM(ppu_opcode_t op)
Expand All @@ -671,10 +663,9 @@ void PPUTranslator::VADDUWS(ppu_opcode_t op)
{
const auto a = get_vr<u32[4]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u32[4]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}

void PPUTranslator::VAND(ppu_opcode_t op)
Expand Down Expand Up @@ -1331,8 +1322,8 @@ void PPUTranslator::VRSQRTEFP(ppu_opcode_t op)

void PPUTranslator::VSEL(ppu_opcode_t op)
{
const auto abc = GetVrs(VrType::vi32, op.va, op.vb, op.vc);
SetVr(op.vd, m_ir->CreateOr(m_ir->CreateAnd(abc[1], abc[2]), m_ir->CreateAnd(abc[0], m_ir->CreateNot(abc[2]))));
const auto [a, b, c] = get_vrs<u32[4]>(op.va, op.vb, op.vc);
set_vr(op.vd, eval((b & c) | (a & ~c)));
}

void PPUTranslator::VSL(ppu_opcode_t op)
Expand Down Expand Up @@ -1491,33 +1482,27 @@ void PPUTranslator::VSUBSBS(ppu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.va);
const auto b = get_vr<s8[16]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 7) ^ 0x7f);
const auto x = eval(sborrow(a, b, d) >> 7);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBSHS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s16[8]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 15) ^ 0x7fff);
const auto x = eval(sborrow(a, b, d) >> 15);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBSWS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 31) ^ 0x7fffffff);
const auto x = eval(sborrow(a, b, d) >> 31);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBUBM(ppu_opcode_t op)
Expand All @@ -1531,10 +1516,9 @@ void PPUTranslator::VSUBUBS(ppu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.va);
const auto b = get_vr<u8[16]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u8[16]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBUHM(ppu_opcode_t op)
Expand All @@ -1548,10 +1532,9 @@ void PPUTranslator::VSUBUHS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u16[8]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUBUWM(ppu_opcode_t op)
Expand All @@ -1565,10 +1548,9 @@ void PPUTranslator::VSUBUWS(ppu_opcode_t op)
{
const auto a = get_vr<u32[4]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u32[4]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}

void PPUTranslator::VSUMSWS(ppu_opcode_t op)
Expand Down
8 changes: 7 additions & 1 deletion rpcs3/Emu/Cell/PPUTranslator.h
Expand Up @@ -87,6 +87,12 @@ class PPUTranslator final : public cpu_translator
return result;
}

template <typename T, typename... Args>
std::tuple<std::conditional_t<false, Args, value_t<T>>...> get_vrs(const Args&... args)
{
return {get_vr<T>(args)...};
}

template <typename T>
void set_vr(u32 vr, value_t<T> v)
{
Expand Down Expand Up @@ -296,7 +302,7 @@ class PPUTranslator final : public cpu_translator
llvm::CallInst* Call(llvm::Type* ret, llvm::AttributeList attr, llvm::StringRef name, Args... args)
{
// Call the function
return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...), {args...});
return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...).getCallee(), {args...});
}

// Call a function
Expand Down