RPCS3 · Nekotekina · Mar 30, 2019 · Mar 28, 2019 · Mar 29, 2019 · Mar 29, 2019
diff --git a/3rdparty/llvm.cmake b/3rdparty/llvm.cmake
@@ -26,7 +26,7 @@ if(WITH_LLVM)
 		set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})
 
 		# now tries to find LLVM again
-		find_package(LLVM 8.0 CONFIG)
+		find_package(LLVM 9.0 CONFIG)
 		if(NOT LLVM_FOUND)
 			message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
 		endif()
@@ -39,11 +39,11 @@ if(WITH_LLVM)
 			set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
 		endif()
 
-		find_package(LLVM 8.0 CONFIG)
+		find_package(LLVM 9.0 CONFIG)
 
 		if (NOT LLVM_FOUND)
-			if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 8)
-				message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 8.0. \
+			if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 9)
+				message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 9.0. \
 														 Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
 			endif()
 

diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp
@@ -570,7 +570,7 @@ struct EventListener : llvm::JITEventListener
 	{
 	}
 
-	void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
+	void notifyObjectLoaded(ObjectKey K, const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
 	{
 #ifdef _WIN32
 		for (auto it = obj.section_begin(), end = obj.section_end(); it != end; ++it)

diff --git a/llvm b/llvm
diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h
@@ -992,13 +992,6 @@ class cpu_translator
 		return (b ^ s) & ~(a ^ b);
 	}
 
-	// Get signed subtraction overflow into the sign bit (d = a - b)
-	template <typename T>
-	static inline auto sborrow(T a, T b, T d)
-	{
-		return (a ^ b) & (a ^ d);
-	}
-
 	// Bitwise select (c ? a : b)
 	template <typename T>
 	static inline auto merge(T c, T a, T b)
@@ -1014,12 +1007,96 @@ class cpu_translator
 		return a << (b & mask) | a >> (-b & mask);
 	}
 
-	// Rotate left
+	// Add with saturation
 	template <typename T>
-	static inline auto rol(T a, u64 b)
+	inline auto add_sat(T a, T b)
 	{
-		static constexpr u64 mask = value_t<typename T::type>::esize - 1;
-		return a << (b & mask) | a >> ((0 - b) & mask);
+		value_t<typename T::type> result;
+		const auto eva = a.eval(m_ir);
+		const auto evb = b.eval(m_ir);
+
+		// Compute constant result immediately if possible
+		if (llvm::isa<llvm::Constant>(eva) && llvm::isa<llvm::Constant>(evb))
+		{
+			static_assert(result.is_sint || result.is_uint);
+
+			if constexpr (result.is_sint)
+			{
+				llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
+				if constexpr (result.is_vector != 0)
+					cast_to = llvm::VectorType::get(cast_to, result.is_vector);
+
+				const auto axt = m_ir->CreateSExt(eva, cast_to);
+				const auto bxt = m_ir->CreateSExt(evb, cast_to);
+				result.value = m_ir->CreateAdd(axt, bxt);
+				const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize));
+				const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize));
+				const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
+				const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
+				result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value);
+				result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value);
+				result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context));
+			}
+			else
+			{
+				const auto _max = m_ir->getInt(llvm::APInt::getMaxValue(result.esize));
+				const auto ones = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
+				result.value = m_ir->CreateAdd(eva, evb);
+				result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(result.value, eva), ones, result.value);
+			}
+		}
+		else
+		{
+			result.value = m_ir->CreateCall(get_intrinsic<typename T::type>(result.is_sint ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat), {eva, evb});
+		}
+
+		return result;
+	}
+
+	// Subtract with saturation
+	template <typename T>
+	inline auto sub_sat(T a, T b)
+	{
+		value_t<typename T::type> result;
+		const auto eva = a.eval(m_ir);
+		const auto evb = b.eval(m_ir);
+
+		// Compute constant result immediately if possible
+		if (llvm::isa<llvm::Constant>(eva) && llvm::isa<llvm::Constant>(evb))
+		{
+			static_assert(result.is_sint || result.is_uint);
+
+			if constexpr (result.is_sint)
+			{
+				llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
+				if constexpr (result.is_vector != 0)
+					cast_to = llvm::VectorType::get(cast_to, result.is_vector);
+
+				const auto axt = m_ir->CreateSExt(eva, cast_to);
+				const auto bxt = m_ir->CreateSExt(evb, cast_to);
+				result.value = m_ir->CreateSub(axt, bxt);
+				const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize));
+				const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize));
+				const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
+				const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
+				result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value);
+				result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value);
+				result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context));
+			}
+			else
+			{
+				const auto _min = m_ir->getInt(llvm::APInt::getMinValue(result.esize));
+				const auto zero = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
+				result.value = m_ir->CreateSub(eva, evb);
+				result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(eva, evb), zero, result.value);
+			}
+		}
+		else
+		{
+			result.value = m_ir->CreateCall(get_intrinsic<typename T::type>(result.is_sint ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat), {eva, evb});
+		}
+
+		return result;
 	}
 
 	// Average: (a + b + 1) >> 1
@@ -1029,18 +1106,15 @@ class cpu_translator
 		//return (a >> 1) + (b >> 1) + ((a | b) & 1);
 
 		value_t<typename T::type> result;
-		llvm::Instruction::CastOps cast_op = llvm::Instruction::BitCast;
-		if (result.is_sint)
-			cast_op = llvm::Instruction::SExt;
-		if (result.is_uint)
-			cast_op = llvm::Instruction::ZExt;
-		llvm::Type* cast_t = m_ir->getIntNTy(result.esize * 2);
-		if (result.is_vector != 0)
-			cast_t = llvm::VectorType::get(cast_t, result.is_vector);
-
-		const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_t);
-		const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_t);
-		const auto cxt = llvm::ConstantInt::get(cast_t, 1, false);
+		static_assert(result.is_sint || result.is_uint);
+		const auto cast_op = result.is_sint ? llvm::Instruction::SExt : llvm::Instruction::ZExt;
+		llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
+		if constexpr (result.is_vector != 0)
+			cast_to = llvm::VectorType::get(cast_to, result.is_vector);
+
+		const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_to);
+		const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_to);
+		const auto cxt = llvm::ConstantInt::get(cast_to, 1, false);
 		const auto abc = m_ir->CreateAdd(m_ir->CreateAdd(axt, bxt), cxt);
 		result.value = m_ir->CreateTrunc(m_ir->CreateLShr(abc, 1), result.get_type(m_context));
 		return result;

diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp
@@ -1690,7 +1690,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
 	{
 		if (func.size)
 		{
-			const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func));
+			const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func).getCallee());
 			f->addAttribute(1, Attribute::NoAlias);
 		}
 	}

diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp
@@ -267,7 +267,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
 			return;
 		}
 
-		indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type);
+		indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type).getCallee();
 	}
 	else
 	{
@@ -597,33 +597,27 @@ void PPUTranslator::VADDSBS(ppu_opcode_t op)
 {
 	const auto a = get_vr<s8[16]>(op.va);
 	const auto b = get_vr<s8[16]>(op.vb);
-	const auto s = eval(a + b);
-	const auto z = eval((a >> 7) ^ 0x7f);
-	const auto x = eval(scarry(a, b, s) >> 7);
-	set_vr(op.vd, eval(merge(x, z, s)));
-	SetSat(IsNotZero(x.value));
+	const auto r = add_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a + b)).value));
 }
 
 void PPUTranslator::VADDSHS(ppu_opcode_t op)
 {
 	const auto a = get_vr<s16[8]>(op.va);
 	const auto b = get_vr<s16[8]>(op.vb);
-	const auto s = eval(a + b);
-	const auto z = eval((a >> 15) ^ 0x7fff);
-	const auto x = eval(scarry(a, b, s) >> 15);
-	set_vr(op.vd, eval(merge(x, z, s)));
-	SetSat(IsNotZero(x.value));
+	const auto r = add_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a + b)).value));
 }
 
 void PPUTranslator::VADDSWS(ppu_opcode_t op)
 {
 	const auto a = get_vr<s32[4]>(op.va);
 	const auto b = get_vr<s32[4]>(op.vb);
-	const auto s = eval(a + b);
-	const auto z = eval((a >> 31) ^ 0x7fffffff);
-	const auto x = eval(scarry(a, b, s) >> 31);
-	set_vr(op.vd, eval(merge(x, z, s)));
-	SetSat(IsNotZero(x.value));
+	const auto r = add_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a + b)).value));
 }
 
 void PPUTranslator::VADDUBM(ppu_opcode_t op)
@@ -637,10 +631,9 @@ void PPUTranslator::VADDUBS(ppu_opcode_t op)
 {
 	const auto a = get_vr<u8[16]>(op.va);
 	const auto b = get_vr<u8[16]>(op.vb);
-	const auto s = eval(a + b);
-	const auto x = eval(s < a);
-	set_vr(op.vd, select(x, splat<u8[16]>(-1), s));
-	SetSat(IsNotZero(x.value));
+	const auto r = add_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a + b)).value));
 }
 
 void PPUTranslator::VADDUHM(ppu_opcode_t op)
@@ -654,10 +647,9 @@ void PPUTranslator::VADDUHS(ppu_opcode_t op)
 {
 	const auto a = get_vr<u16[8]>(op.va);
 	const auto b = get_vr<u16[8]>(op.vb);
-	const auto s = eval(a + b);
-	const auto x = eval(s < a);
-	set_vr(op.vd, select(x, splat<u16[8]>(-1), s));
-	SetSat(IsNotZero(x.value));
+	const auto r = add_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a + b)).value));
 }
 
 void PPUTranslator::VADDUWM(ppu_opcode_t op)
@@ -671,10 +663,9 @@ void PPUTranslator::VADDUWS(ppu_opcode_t op)
 {
 	const auto a = get_vr<u32[4]>(op.va);
 	const auto b = get_vr<u32[4]>(op.vb);
-	const auto s = eval(a + b);
-	const auto x = eval(s < a);
-	set_vr(op.vd, select(x, splat<u32[4]>(-1), s));
-	SetSat(IsNotZero(x.value));
+	const auto r = add_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a + b)).value));
 }
 
 void PPUTranslator::VAND(ppu_opcode_t op)
@@ -1331,8 +1322,8 @@ void PPUTranslator::VRSQRTEFP(ppu_opcode_t op)
 
 void PPUTranslator::VSEL(ppu_opcode_t op)
 {
-	const auto abc = GetVrs(VrType::vi32, op.va, op.vb, op.vc);
-	SetVr(op.vd, m_ir->CreateOr(m_ir->CreateAnd(abc[1], abc[2]), m_ir->CreateAnd(abc[0], m_ir->CreateNot(abc[2]))));
+	const auto [a, b, c] = get_vrs<u32[4]>(op.va, op.vb, op.vc);
+	set_vr(op.vd, eval((b & c) | (a & ~c)));
 }
 
 void PPUTranslator::VSL(ppu_opcode_t op)
@@ -1491,33 +1482,27 @@ void PPUTranslator::VSUBSBS(ppu_opcode_t op)
 {
 	const auto a = get_vr<s8[16]>(op.va);
 	const auto b = get_vr<s8[16]>(op.vb);
-	const auto d = eval(a - b);
-	const auto z = eval((a >> 7) ^ 0x7f);
-	const auto x = eval(sborrow(a, b, d) >> 7);
-	set_vr(op.vd, eval(merge(x, z, d)));
-	SetSat(IsNotZero(x.value));
+	const auto r = sub_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a - b)).value));
 }
 
 void PPUTranslator::VSUBSHS(ppu_opcode_t op)
 {
 	const auto a = get_vr<s16[8]>(op.va);
 	const auto b = get_vr<s16[8]>(op.vb);
-	const auto d = eval(a - b);
-	const auto z = eval((a >> 15) ^ 0x7fff);
-	const auto x = eval(sborrow(a, b, d) >> 15);
-	set_vr(op.vd, eval(merge(x, z, d)));
-	SetSat(IsNotZero(x.value));
+	const auto r = sub_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a - b)).value));
 }
 
 void PPUTranslator::VSUBSWS(ppu_opcode_t op)
 {
 	const auto a = get_vr<s32[4]>(op.va);
 	const auto b = get_vr<s32[4]>(op.vb);
-	const auto d = eval(a - b);
-	const auto z = eval((a >> 31) ^ 0x7fffffff);
-	const auto x = eval(sborrow(a, b, d) >> 31);
-	set_vr(op.vd, eval(merge(x, z, d)));
-	SetSat(IsNotZero(x.value));
+	const auto r = sub_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a - b)).value));
 }
 
 void PPUTranslator::VSUBUBM(ppu_opcode_t op)
@@ -1531,10 +1516,9 @@ void PPUTranslator::VSUBUBS(ppu_opcode_t op)
 {
 	const auto a = get_vr<u8[16]>(op.va);
 	const auto b = get_vr<u8[16]>(op.vb);
-	const auto d = eval(a - b);
-	const auto x = eval(a < b);
-	set_vr(op.vd, select(x, splat<u8[16]>(0), d));
-	SetSat(IsNotZero(x.value));
+	const auto r = sub_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a - b)).value));
 }
 
 void PPUTranslator::VSUBUHM(ppu_opcode_t op)
@@ -1548,10 +1532,9 @@ void PPUTranslator::VSUBUHS(ppu_opcode_t op)
 {
 	const auto a = get_vr<u16[8]>(op.va);
 	const auto b = get_vr<u16[8]>(op.vb);
-	const auto d = eval(a - b);
-	const auto x = eval(a < b);
-	set_vr(op.vd, select(x, splat<u16[8]>(0), d));
-	SetSat(IsNotZero(x.value));
+	const auto r = sub_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a - b)).value));
 }
 
 void PPUTranslator::VSUBUWM(ppu_opcode_t op)
@@ -1565,10 +1548,9 @@ void PPUTranslator::VSUBUWS(ppu_opcode_t op)
 {
 	const auto a = get_vr<u32[4]>(op.va);
 	const auto b = get_vr<u32[4]>(op.vb);
-	const auto d = eval(a - b);
-	const auto x = eval(a < b);
-	set_vr(op.vd, select(x, splat<u32[4]>(0), d));
-	SetSat(IsNotZero(x.value));
+	const auto r = sub_sat(a, b);
+	set_vr(op.vd, r);
+	SetSat(IsNotZero(eval(r != (a - b)).value));
 }
 
 void PPUTranslator::VSUMSWS(ppu_opcode_t op)

diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h
@@ -87,6 +87,12 @@ class PPUTranslator final : public cpu_translator
 		return result;
 	}
 
+	template <typename T, typename... Args>
+	std::tuple<std::conditional_t<false, Args, value_t<T>>...> get_vrs(const Args&... args)
+	{
+		return {get_vr<T>(args)...};
+	}
+
 	template <typename T>
 	void set_vr(u32 vr, value_t<T> v)
 	{
@@ -296,7 +302,7 @@ class PPUTranslator final : public cpu_translator
 	llvm::CallInst* Call(llvm::Type* ret, llvm::AttributeList attr, llvm::StringRef name, Args... args)
 	{
 		// Call the function
-		return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...), {args...});
+		return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...).getCallee(), {args...});
 	}
 
 	// Call a function