Skip to content

Commit

Permalink
Initial Linux Aarch64 support
Browse files Browse the repository at this point in the history
* Update asmjit dependency (aarch64 branch)
* Disable USE_DISCORD_RPC by default
* Dump some JIT objects in rpcs3 cache dir
* Add SIGILL handler for all platforms
* Fix resetting zeroing denormals in thread pool
* Refactor most v128:: utils into global gv_** functions
* Refactor PPU interpreter (incomplete), remove "precise"
* - Instruction specializations with multiple accuracy flags
* - Adjust calling convention for speed
* - Removed precise/fast setting, replaced with static
* - Started refactoring interpreters for building at runtime JIT
*   (I got tired of poor compiler optimizations)
* - Expose some accuracy settings (SAT, NJ, VNAN, FPCC)
* - Add exec_bytes PPU thread variable (akin to cycle count)
* PPU LLVM: fix VCTUXS+VCTSXS instruction NaN results
* SPU interpreter: remove "precise" for now (extremely non-portable)
* - As with PPU, settings changed to static/dynamic for interpreters.
* - Precise options will be implemented later
* Fix termination after fatal error dialog
  • Loading branch information
Nekotekina committed Jan 15, 2022
1 parent d6aa834 commit f8ebbbd
Show file tree
Hide file tree
Showing 89 changed files with 20,131 additions and 5,362 deletions.
2 changes: 1 addition & 1 deletion 3rdparty/discord-rpc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
add_library(3rdparty_discordRPC INTERFACE)

# We don't want Discord Rich Presence on the BSDs and other OSes
if (USE_DISCORD_RPC AND (WIN32 OR CMAKE_SYSTEM MATCHES "Linux" OR APPLE))
if (USE_DISCORD_RPC AND (WIN32 OR CMAKE_SYSTEM MATCHES "Linux" OR APPLE) AND COMPILER_X86)
if (WIN32 AND NOT MSVC)
ExternalProject_Add(discordRPC
GIT_REPOSITORY https://github.com/discordapp/discord-rpc
Expand Down
14 changes: 12 additions & 2 deletions 3rdparty/llvm.cmake
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
if(WITH_LLVM)
CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86)
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM)

if(BUILD_LLVM_SUBMODULE)
message(STATUS "LLVM will be built from the submodule.")

set(LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "")
option(LLVM_BUILD_RUNTIME OFF)
option(LLVM_BUILD_TOOLS OFF)
option(LLVM_INCLUDE_BENCHMARKS OFF)
Expand Down Expand Up @@ -61,7 +63,15 @@ if(WITH_LLVM)
endif()
endif()

set(LLVM_LIBS LLVMMCJIT LLVMX86CodeGen LLVMX86AsmParser)
set(LLVM_LIBS LLVMMCJIT)

if(COMPILER_X86)
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser)
endif()

if(COMPILER_ARM)
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser LLVMARMCodeGen LLVMARMAsmParser)
endif()

if(WIN32 OR CMAKE_SYSTEM MATCHES "Linux")
set(LLVM_LIBS ${LLVM_LIBS} LLVMIntelJITEvents)
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ option(WITH_LLVM "Enable usage of LLVM library" ON)
option(BUILD_LLVM_SUBMODULE "Build LLVM from git submodule" ON)
option(USE_FAUDIO "FAudio audio backend" ON)
option(USE_LIBEVDEV "libevdev-based joystick support" ON)
option(USE_DISCORD_RPC "Discord rich presence integration" ON)
option(USE_DISCORD_RPC "Discord rich presence integration" OFF)
option(USE_SYSTEM_ZLIB "Prefer system ZLIB instead of the builtin one" ON)
option(USE_VULKAN "Vulkan render backend" ON)
option(USE_PRECOMPILED_HEADERS "Use precompiled headers" OFF)
Expand Down
34 changes: 29 additions & 5 deletions Utilities/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ LOG_CHANNEL(jit_log, "JIT");

void jit_announce(uptr func, usz size, std::string_view name)
{
if (!size)
{
jit_log.error("Empty function announced: %s (%p)", name, func);
return;
}

#ifdef __linux__
static const fs::file s_map(fmt::format("/tmp/perf-%d.map", getpid()), fs::rewrite + fs::append);

Expand Down Expand Up @@ -124,15 +130,31 @@ void* jit_runtime_base::_add(asmjit::CodeHolder* code) noexcept
{
ensure(!code->flatten());
ensure(!code->resolveUnresolvedLinks());
usz codeSize = ensure(code->codeSize());
usz codeSize = code->codeSize();
if (!codeSize)
return nullptr;

auto p = ensure(this->_alloc(codeSize, 64));
ensure(!code->relocateToBase(uptr(p)));

asmjit::VirtMem::ProtectJitReadWriteScope rwScope(p, codeSize);
{
asmjit::VirtMem::ProtectJitReadWriteScope rwScope(p, codeSize);

for (asmjit::Section* section : code->_sections)
for (asmjit::Section* section : code->_sections)
{
std::memcpy(p + section->offset(), section->data(), section->bufferSize());
}
}

if (!dump_name.empty())
{
std::memcpy(p + section->offset(), section->data(), section->bufferSize());
// If directory ASMJIT doesn't exist, nothing will be written
fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), dump_name), fs::rewrite);

if (dump)
{
dump.write(p, codeSize);
}
}

return p;
Expand Down Expand Up @@ -349,8 +371,9 @@ static u64 make_null_function(const std::string& name)
using namespace asmjit;

// Build a "null" function that contains its name
const auto func = build_function_asm<void (*)()>("NULL", [&](x86::Assembler& c, auto& args)
const auto func = build_function_asm<void (*)()>("NULL", [&](native_asm& c, auto& args)
{
#if defined(ARCH_X64)
Label data = c.newLabel();
c.lea(args[0], x86::qword_ptr(data, 0));
c.jmp(Imm(&null));
Expand All @@ -362,6 +385,7 @@ static u64 make_null_function(const std::string& name)
c.db(ch);
c.db(0);
c.align(AlignMode::kData, 16);
#endif
});

func_ptr = reinterpret_cast<u64>(func);
Expand Down
104 changes: 92 additions & 12 deletions Utilities/JIT.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@
#pragma GCC diagnostic ignored "-Wredundant-decls"
#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
#pragma GCC diagnostic ignored "-Weffc++"
#ifndef __clang__
#ifdef __clang__
#pragma GCC diagnostic ignored "-Wdeprecated-anon-enum-enum-conversion"
#pragma GCC diagnostic ignored "-Wcast-qual"
#else
#pragma GCC diagnostic ignored "-Wduplicated-branches"
#pragma GCC diagnostic ignored "-Wdeprecated-enum-enum-conversion"
#endif
#include <asmjit/asmjit.h>
#if defined(ARCH_ARM64)
#include <asmjit/a64.h>
#endif
#pragma GCC diagnostic pop
#endif

Expand All @@ -36,6 +43,14 @@
#include <string_view>
#include <unordered_map>

#if defined(ARCH_X64)
using native_asm = asmjit::x86::Assembler;
using native_args = std::array<asmjit::x86::Gp, 4>;
#elif defined(ARCH_ARM64)
using native_asm = asmjit::a64::Assembler;
using native_args = std::array<asmjit::a64::Gp, 4>;
#endif

void jit_announce(uptr func, usz size, std::string_view name);

void jit_announce(auto* func, usz size, std::string_view name)
Expand All @@ -62,6 +77,8 @@ struct jit_runtime_base
const asmjit::Environment& environment() const noexcept;
void* _add(asmjit::CodeHolder* code) noexcept;
virtual uchar* _alloc(usz size, usz align) noexcept = 0;

std::string_view dump_name;
};

// ASMJIT runtime for emitting code in a single 2G region
Expand Down Expand Up @@ -167,11 +184,39 @@ namespace asmjit
}
}

inline void build_init_args_from_ghc(native_asm& c, native_args& args)
{
#if defined(ARCH_X64)
// TODO: handle case when args don't overlap with r13/rbp/r12/rbx
c.mov(args[0], x86::r13);
c.mov(args[1], x86::rbp);
c.mov(args[2], x86::r12);
c.mov(args[3], x86::rbx);
#else
static_cast<void>(c);
static_cast<void>(args);
#endif
}

inline void build_init_ghc_args(native_asm& c, native_args& args)
{
#if defined(ARCH_X64)
// TODO: handle case when args don't overlap with r13/rbp/r12/rbx
c.mov(x86::r13, args[0]);
c.mov(x86::rbp, args[1]);
c.mov(x86::r12, args[2]);
c.mov(x86::rbx, args[3]);
#else
static_cast<void>(c);
static_cast<void>(args);
#endif
}

using imm_ptr = Imm;
}

// Build runtime function with asmjit::X86Assembler
template <typename FT, typename F>
template <typename FT, typename Asm = native_asm, typename F>
inline FT build_function_asm(std::string_view name, F&& builder)
{
using namespace asmjit;
Expand All @@ -181,7 +226,8 @@ inline FT build_function_asm(std::string_view name, F&& builder)
CodeHolder code;
code.init(rt.environment());

std::array<x86::Gp, 4> args;
#if defined(ARCH_X64)
native_args args;
#ifdef _WIN32
args[0] = x86::rcx;
args[1] = x86::rdx;
Expand All @@ -193,16 +239,27 @@ inline FT build_function_asm(std::string_view name, F&& builder)
args[2] = x86::rdx;
args[3] = x86::rcx;
#endif
#elif defined(ARCH_ARM64)
native_args args;
args[0] = a64::x0;
args[1] = a64::x1;
args[2] = a64::x2;
args[3] = a64::x3;
#endif

x86::Assembler compiler(&code);
Asm compiler(&code);
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
builder(std::ref(compiler), args);
if constexpr (std::is_invocable_v<F, Asm&, native_args&>)
builder(compiler, args);
else
builder(compiler);
rt.dump_name = name;
const auto result = rt._add(&code);
jit_announce(result, code.codeSize(), name);
return reinterpret_cast<FT>(uptr(result));
}

#ifdef __APPLE__
#if !defined(ARCH_X64) || defined(__APPLE__)
template <typename FT, usz = 4096>
class built_function
{
Expand All @@ -213,9 +270,23 @@ class built_function

built_function& operator=(const built_function&) = delete;

template <typename F>
built_function(std::string_view name, F&& builder)
: m_func(ensure(build_function_asm<FT>(name, std::forward<F>(builder))))
template <typename F> requires (std::is_invocable_v<F, native_asm&, native_args&>)
built_function(std::string_view name, F&& builder,
u32 line = __builtin_LINE(),
u32 col = __builtin_COLUMN(),
const char* file = __builtin_FILE(),
const char* func = __builtin_FUNCTION())
: m_func(ensure(build_function_asm<FT>(name, std::forward<F>(builder)), const_str(), line, col, file, func))
{
}

template <typename F> requires (std::is_invocable_v<F>)
built_function(std::string_view, F&& getter,
u32 line = __builtin_LINE(),
u32 col = __builtin_COLUMN(),
const char* file = __builtin_FILE(),
const char* func = __builtin_FUNCTION())
: m_func(ensure(getter(), const_str(), line, col, file, func))
{
}

Expand Down Expand Up @@ -251,7 +322,8 @@ class built_function
CodeHolder code;
code.init(rt.environment());

std::array<x86::Gp, 4> args;
#if defined(ARCH_X64)
native_args args;
#ifdef _WIN32
args[0] = x86::rcx;
args[1] = x86::rdx;
Expand All @@ -263,10 +335,18 @@ class built_function
args[2] = x86::rdx;
args[3] = x86::rcx;
#endif
#elif defined(ARCH_ARM64)
native_args args;
args[0] = a64::x0;
args[1] = a64::x1;
args[2] = a64::x2;
args[3] = a64::x3;
#endif

x86::Assembler compiler(&code);
native_asm compiler(&code);
compiler.addEncodingOptions(EncodingOptions::kOptimizedAlign);
builder(std::ref(compiler), args);
builder(compiler, args);
rt.dump_name = name;
jit_announce(rt._add(&code), code.codeSize(), name);
}

Expand Down
2 changes: 1 addition & 1 deletion Utilities/StrFmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ struct fmt_class_string<T, void>
static void format(std::string& out, u64 arg)
{
const auto& obj = get_object(arg);

void format_byte_array(std::string&, const uchar*, usz);
format_byte_array(out, reinterpret_cast<const uchar*>(std::data(obj)), std::size(obj));
}
Expand Down

0 comments on commit f8ebbbd

Please sign in to comment.