Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make SPU recompilers lock-free #6879

Merged
merged 5 commits into from
Oct 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
42 changes: 25 additions & 17 deletions Utilities/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
#include "VirtualMemory.h"
#include <immintrin.h>

// Memory manager mutex
shared_mutex s_mutex2;

#ifdef __linux__
#include <sys/mman.h>
#define CAN_OVERCOMMIT
#endif

Expand Down Expand Up @@ -50,30 +48,32 @@ static u8* add_jit_memory(std::size_t size, uint align)
return pointer;
}

#ifndef CAN_OVERCOMMIT
std::lock_guard lock(s_mutex2);
#endif

u64 olda, newa;

// Simple allocation by incrementing pointer to the next free data
const u64 pos = Ctr.atomic_op([&](u64& ctr) -> u64
{
const u64 _pos = ::align(ctr, align);
const u64 _pos = ::align(ctr & 0xffff'ffff, align);
const u64 _new = ::align(_pos + size, align);

if (UNLIKELY(_new > 0x40000000))
{
// Sorry, we failed, and further attempts should fail too.
ctr = 0x40000000;
ctr |= 0x40000000;
return -1;
}

// Last allocation is stored in highest bits
olda = ctr >> 32;
newa = olda;

// Check the necessity to commit more memory
olda = ::align(ctr, 0x10000);
newa = ::align(_new, 0x10000);
if (UNLIKELY(_new > olda))
{
newa = ::align(_new, 0x100000);
}

ctr = _new;
ctr += _new - (ctr & 0xffff'ffff);
return _pos;
});

Expand All @@ -86,11 +86,19 @@ static u8* add_jit_memory(std::size_t size, uint align)
if (UNLIKELY(olda != newa))
{
#ifdef CAN_OVERCOMMIT
// TODO: possibly madvise
madvise(pointer + olda, newa - olda, MADV_WILLNEED);
#else
// Commit more memory
utils::memory_commit(pointer + olda, newa - olda, Prot);
#endif
// Acknowledge committed memory
Ctr.atomic_op([&](u64& ctr)
{
if ((ctr >> 32) < newa)
{
ctr += (newa - (ctr >> 32)) << 32;
}
});
}

return pointer + pos;
Expand Down Expand Up @@ -159,10 +167,10 @@ void jit_runtime::initialize()
}

// Create code/data snapshot
s_code_init.resize(s_code_pos);
std::memcpy(s_code_init.data(), alloc(0, 0, true), s_code_pos);
s_data_init.resize(s_data_pos);
std::memcpy(s_data_init.data(), alloc(0, 0, false), s_data_pos);
s_code_init.resize(s_code_pos & 0xffff'ffff);
std::memcpy(s_code_init.data(), alloc(0, 0, true), s_code_init.size());
s_data_init.resize(s_data_pos & 0xffff'ffff);
std::memcpy(s_data_init.data(), alloc(0, 0, false), s_data_init.size());
}

void jit_runtime::finalize() noexcept
Expand Down
107 changes: 91 additions & 16 deletions Utilities/lockless.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ class lf_queue_item final
template <typename U>
friend class lf_queue;

template <typename U>
friend class lf_bunch;

constexpr lf_queue_item() = default;

template <typename... Args>
Expand Down Expand Up @@ -195,6 +198,9 @@ class lf_queue_iterator
template <typename U>
friend class lf_queue_slice;

template <typename U>
friend class lf_bunch;

public:
constexpr lf_queue_iterator() = default;

Expand Down Expand Up @@ -311,22 +317,16 @@ class lf_queue_slice
}
};

class lf_queue_base
{
protected:
atomic_t<std::uintptr_t> m_head = 0;
};

// Linked list-based multi-producer queue (the consumer drains the whole queue at once)
template <typename T>
class lf_queue : public lf_queue_base
class lf_queue final
{
using lf_queue_base::m_head;
atomic_t<lf_queue_item<T>*> m_head{nullptr};

// Extract all elements and reverse element order (FILO to FIFO)
lf_queue_item<T>* reverse() noexcept
{
if (auto* head = m_head.load() ? reinterpret_cast<lf_queue_item<T>*>(m_head.exchange(0)) : nullptr)
if (auto* head = m_head.load() ? m_head.exchange(nullptr) : nullptr)
{
if (auto* prev = head->m_link)
{
Expand All @@ -352,26 +352,26 @@ class lf_queue : public lf_queue_base

~lf_queue()
{
delete reinterpret_cast<lf_queue_item<T>*>(m_head.load());
delete m_head.load();
}

void wait() noexcept
{
if (m_head == 0)
if (m_head == nullptr)
{
m_head.wait(0);
m_head.wait(nullptr);
}
}

template <typename... Args>
void push(Args&&... args)
{
auto _old = m_head.load();
auto* item = new lf_queue_item<T>(reinterpret_cast<lf_queue_item<T>*>(_old), std::forward<Args>(args)...);
auto _old = m_head.load();
auto item = new lf_queue_item<T>(_old, std::forward<Args>(args)...);

while (!m_head.compare_exchange(_old, reinterpret_cast<std::uint64_t>(item)))
while (!m_head.compare_exchange(_old, item))
{
item->m_link = reinterpret_cast<lf_queue_item<T>*>(_old);
item->m_link = _old;
}

if (!_old)
Expand Down Expand Up @@ -462,6 +462,81 @@ class lf_queue : public lf_queue_base
}
};

// Concurrent linked list, elements remain until destroyed.
template <typename T>
class lf_bunch final
{
atomic_t<lf_queue_item<T>*> m_head{nullptr};

public:
constexpr lf_bunch() noexcept = default;

~lf_bunch()
{
delete m_head.load();
}

// Add unconditionally
template <typename... Args>
T* push(Args&&... args) noexcept
{
auto _old = m_head.load();
auto item = new lf_queue_item<T>(_old, std::forward<Args>(args)...);

while (!m_head.compare_exchange(_old, item))
{
item->m_link = _old;
}

return &item->m_data;
}

// Add if pred(item, all_items) is true for all existing items
template <typename F, typename... Args>
T* push_if(F pred, Args&&... args) noexcept
{
auto _old = m_head.load();
auto _chk = _old;
auto item = new lf_queue_item<T>(_old, std::forward<Args>(args)...);

_chk = nullptr;

do
{
item->m_link = _old;

// Check all items in the queue
for (auto ptr = _old; ptr != _chk; ptr = ptr->m_link)
{
if (!pred(item->m_data, ptr->m_data))
{
item->m_link = nullptr;
delete item;
return nullptr;
}
}

// Set to not check already checked items
_chk = _old;
}
while (!m_head.compare_exchange(_old, item));

return &item->m_data;
}

lf_queue_iterator<T> begin() const
{
lf_queue_iterator<T> result;
result.m_ptr = m_head.load();
return result;
}

lf_queue_iterator<T> end() const
{
return {};
}
};

// Assignable lock-free thread-safe value of any type (memory-inefficient)
template <typename T>
class lf_value final
Expand Down
5 changes: 2 additions & 3 deletions rpcs3/Emu/CPU/CPUThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ void fmt_class_string<cpu_flag>::format(std::string& out, u64 arg)
case cpu_flag::ret: return "ret";
case cpu_flag::signal: return "sig";
case cpu_flag::memory: return "mem";
case cpu_flag::jit_return: return "JIT";
case cpu_flag::dbg_global_pause: return "G-PAUSE";
case cpu_flag::dbg_global_stop: return "G-EXIT";
case cpu_flag::dbg_pause: return "PAUSE";
Expand Down Expand Up @@ -423,7 +422,7 @@ bool cpu_thread::check_state() noexcept
state -= cpu_flag::memory;
}

if (state & (cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop))
if (state & (cpu_flag::exit + cpu_flag::dbg_global_stop))
{
state += cpu_flag::wait;
return true;
Expand All @@ -432,7 +431,7 @@ bool cpu_thread::check_state() noexcept
const auto [state0, escape] = state.fetch_op([&](bs_t<cpu_flag>& flags)
{
// Atomically clean wait flag and escape
if (!(flags & (cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop + cpu_flag::ret + cpu_flag::stop)))
if (!(flags & (cpu_flag::exit + cpu_flag::dbg_global_stop + cpu_flag::ret + cpu_flag::stop)))
{
// Check pause flags which hold thread inside check_state
if (flags & (cpu_flag::pause + cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause))
Expand Down
3 changes: 1 addition & 2 deletions rpcs3/Emu/CPU/CPUThread.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ enum class cpu_flag : u32
signal, // Thread received a signal (HLE)
memory, // Thread must unlock memory mutex

jit_return, // JIT compiler event (forced return)
dbg_global_pause, // Emulation paused
dbg_global_stop, // Emulation stopped
dbg_pause, // Thread paused
Expand Down Expand Up @@ -66,7 +65,7 @@ class cpu_thread
// Test stopped state
bool is_stopped() const
{
return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop));
return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::dbg_global_stop));
}

// Test paused state
Expand Down
36 changes: 22 additions & 14 deletions rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,23 @@ void spu_recompiler::init()
}
}

spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u32>& func, void* fn_location)
spu_function_t spu_recompiler::compile(std::vector<u32>&& _func)
{
if (!fn_location)
{
fn_location = m_spurt->find(last_reset_count, func);
}
const auto add_loc = m_spurt->add_empty(std::move(_func));

if (fn_location == spu_runtime::g_dispatcher)
if (!add_loc)
{
return &dispatch;
return nullptr;
}

if (!fn_location)
if (add_loc->compiled)
{
return nullptr;
return add_loc->compiled;
}

if (auto cache = g_fxo->get<spu_cache>(); cache && g_cfg.core.spu_cache)
const std::vector<u32>& func = add_loc->data;

if (auto cache = g_fxo->get<spu_cache>(); cache && g_cfg.core.spu_cache && !add_loc->cached.exchange(1))
{
cache->add(func);
}
Expand Down Expand Up @@ -94,10 +93,10 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
X86Assembler compiler(&code);
this->c = &compiler;

if (g_cfg.core.spu_debug)
if (g_cfg.core.spu_debug && !add_loc->logged.exchange(1))
{
// Dump analyser data
this->dump(log);
this->dump(func, log);
fs::file(m_spurt->get_cache_path() + "spu.log", fs::write + fs::append).write(log);

// Set logger
Expand Down Expand Up @@ -892,12 +891,21 @@ spu_function_t spu_recompiler::compile(u64 last_reset_count, const std::vector<u
LOG_FATAL(SPU, "Failed to build a function");
}

if (!m_spurt->add(last_reset_count, fn_location, fn))
// Install compiled function pointer
const bool added = !add_loc->compiled && add_loc->compiled.compare_and_swap_test(nullptr, fn);

// Rebuild trampoline if necessary
if (!m_spurt->rebuild_ubertrampoline(func[1]))
{
return nullptr;
}

if (g_cfg.core.spu_debug)
if (added)
{
add_loc->compiled.notify_all();
}

if (g_cfg.core.spu_debug && added)
{
// Add ASMJIT logs
fmt::append(log, "Address: %p\n\n", fn);
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/Cell/SPUASMJITRecompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class spu_recompiler : public spu_recompiler_base

virtual void init() override;

virtual spu_function_t compile(u64 last_reset_count, const std::vector<u32>&, void*) override;
virtual spu_function_t compile(std::vector<u32>&&) override;

private:
// ASMJIT runtime
Expand Down