Skip to content

Commit

Permalink
Merge branch 'master' into bps
Browse files Browse the repository at this point in the history
  • Loading branch information
elad335 committed Oct 2, 2023
2 parents d104864 + f35ca71 commit 6a30dc6
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 6 deletions.
2 changes: 1 addition & 1 deletion 3rdparty/CMakeLists.txt
Expand Up @@ -373,7 +373,7 @@ add_library(3rdparty::openal ALIAS 3rdparty_openal)
add_library(3rdparty::ffmpeg ALIAS 3rdparty_ffmpeg)
add_library(3rdparty::glew ALIAS 3rdparty_glew)
add_library(3rdparty::wolfssl ALIAS wolfssl)
add_library(3rdparty::libcurl ALIAS libcurl)
add_library(3rdparty::libcurl ALIAS 3rdparty_libcurl)
add_library(3rdparty::soundtouch ALIAS soundtouch)
add_library(3rdparty::sdl2 ALIAS ${SDL2_TARGET})
add_library(3rdparty::miniupnpc ALIAS libminiupnpc-static)
Expand Down
10 changes: 6 additions & 4 deletions 3rdparty/curl/CMakeLists.txt
Expand Up @@ -3,8 +3,8 @@
if(USE_SYSTEM_CURL)
message(STATUS "RPCS3: using shared libcurl")
find_package(CURL REQUIRED)
add_library(libcurl INTERFACE)
target_link_libraries(libcurl INTERFACE CURL::libcurl)
add_library(3rdparty_libcurl INTERFACE)
target_link_libraries(3rdparty_libcurl INTERFACE CURL::libcurl)
else()
message(STATUS "RPCS3: building libcurl + wolfssl submodules")
set(BUILD_CURL_EXE OFF CACHE BOOL "Set to ON to build curl executable.")
Expand All @@ -28,9 +28,11 @@ else()

add_subdirectory(curl EXCLUDE_FROM_ALL)

target_link_libraries(libcurl PRIVATE wolfssl)
target_link_libraries(libcurl_object PRIVATE wolfssl)
if(MSVC)
target_compile_definitions(libcurl PRIVATE HAVE_SSIZE_T)
target_compile_definitions(libcurl_object PRIVATE HAVE_SSIZE_T)
endif()
add_library(3rdparty_libcurl INTERFACE)
target_link_libraries(3rdparty_libcurl INTERFACE libcurl_static)

endif()
11 changes: 11 additions & 0 deletions rpcs3/Emu/CPU/CPUTranslator.cpp
Expand Up @@ -154,6 +154,16 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
m_use_vnni = true;
}

// Test GFNI feature (TODO)
if (cpu == "tremont" ||
cpu == "gracemont" ||
cpu == "alderlake" ||
cpu == "raptorlake" ||
cpu == "meteorlake")
{
m_use_gfni = true;
}

// Test AVX-512_icelake features (TODO)
if (cpu == "icelake" ||
cpu == "icelake-client" ||
Expand All @@ -168,6 +178,7 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
m_use_avx512 = true;
m_use_avx512_icl = true;
m_use_vnni = true;
m_use_gfni = true;
}

// Aarch64 CPUs
Expand Down
3 changes: 3 additions & 0 deletions rpcs3/Emu/CPU/CPUTranslator.h
Expand Up @@ -2971,6 +2971,9 @@ class cpu_translator
// Allow VNNI
bool m_use_vnni = false;

// Allow GFNI
bool m_use_gfni = false;

// Allow Icelake tier AVX-512
bool m_use_avx512_icl = false;

Expand Down
31 changes: 30 additions & 1 deletion rpcs3/Emu/Cell/SPURecompiler.cpp
Expand Up @@ -8134,21 +8134,50 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator

void GB(spu_opcode_t op)
{
// GFNI trick to extract selected bit from bytes
// By treating the first input as constant, and the second input as variable,
// with only 1 bit set in our constant, gf2p8affineqb will extract that selected bit
// from each byte of the second operand
if (m_use_gfni)
{
const auto a = get_vr<u8[16]>(op.ra);
const auto as = zshuffle(a, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 12, 8, 4, 0);
set_vr(op.rt, gf2p8affineqb(build<u8[16]>(0x0, 0x0, 0x0, 0x0, 0x01, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x01, 0x0, 0x0, 0x0), as, 0x0));
return;
}

const auto a = get_vr<s32[4]>(op.ra);
const auto m = zext<u32>(bitcast<i4>(trunc<bool[4]>(a)));
set_vr(op.rt, insert(splat<u32[4]>(0), 3, eval(m)));
}

void GBH(spu_opcode_t op)
{
if (m_use_gfni)
{
const auto a = get_vr<u8[16]>(op.ra);
const auto as = zshuffle(a, 16, 16, 16, 16, 16, 16, 16, 16, 14, 12, 10, 8, 6, 4, 2, 0);
set_vr(op.rt, gf2p8affineqb(build<u8[16]>(0x0, 0x0, 0x0, 0x0, 0x01, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x01, 0x0, 0x0, 0x0), as, 0x0));
return;
}

const auto a = get_vr<s16[8]>(op.ra);
const auto m = zext<u32>(bitcast<u8>(trunc<bool[8]>(a)));
set_vr(op.rt, insert(splat<u32[4]>(0), 3, eval(m)));
}

void GBB(spu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.ra);
const auto a = get_vr<u8[16]>(op.ra);

if (m_use_gfni)
{
const auto as = zshuffle(a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
const auto m = gf2p8affineqb(build<u8[16]>(0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x01, 0x01, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0), as, 0x0);
set_vr(op.rt, zshuffle(m, 16, 16, 16, 16, 16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
return;
}

const auto m = zext<u32>(bitcast<u16>(trunc<bool[16]>(a)));
set_vr(op.rt, insert(splat<u32[4]>(0), 3, eval(m)));
}
Expand Down

0 comments on commit 6a30dc6

Please sign in to comment.