From 8b6d615aa60212c51d1ff77ca221fd793a6cd47e Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Wed, 25 Nov 2020 15:20:59 +0300 Subject: [PATCH 1/5] atomic.hpp: remove/deprecate incomplete notify features Notification with "phantom value" could be useful in theory. As an alternative way of sending signal from the notifier. But current implementation is just useless. Also fixed slow_mutex (not used anywhere). --- rpcs3/util/atomic.cpp | 8 ++++---- rpcs3/util/atomic.hpp | 33 ++++++++------------------------- rpcs3/util/slow_mutex.hpp | 23 ++++++++++++++--------- 3 files changed, 26 insertions(+), 38 deletions(-) diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index 052e599748bd..f9484bd6d765 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -1350,7 +1350,7 @@ static u32 #ifdef _WIN32 __vectorcall #endif -alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, __m128i mask, __m128i new_value) +alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, __m128i mask, __m128i phantom) { verify(HERE), cond_id; @@ -1358,7 +1358,7 @@ alert_sema(u32 cond_id, const void* data, u64 tid, u32 size, __m128i mask, __m12 u32 ok = 0; - if (!size ? (!tid || cond->tid == tid) : cmp_mask(size, mask, new_value, cond->size | (cond->flag << 8), cond->mask, cond->oldv)) + if (!size ? (!tid || cond->tid == tid) : cmp_mask(size, mask, phantom, cond->size | (cond->flag << 8), cond->mask, cond->oldv)) { // Redirect if necessary const auto _old = cond; @@ -1599,7 +1599,7 @@ SAFE_BUFFERS void #ifdef _WIN32 __vectorcall #endif -atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask, __m128i new_value) +atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask) { const std::uintptr_t iptr = reinterpret_cast(data) & (~s_ref_mask >> 17); @@ -1616,7 +1616,7 @@ atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask, __m128i root_info::slot_search(iptr, size, 0, mask, [&](u32 cond_id) { - u32 res = alert_sema(cond_id, data, -1, size, mask, new_value); + u32 res = alert_sema(cond_id, data, -1, size, mask, _mm_setzero_si128()); if (res <= UINT16_MAX) { diff --git a/rpcs3/util/atomic.hpp b/rpcs3/util/atomic.hpp index b4f359802946..5035c8808bb3 100644 --- a/rpcs3/util/atomic.hpp +++ b/rpcs3/util/atomic.hpp @@ -234,7 +234,7 @@ struct atomic_wait_engine #ifdef _WIN32 __vectorcall #endif - notify_all(const void* data, u32 size, __m128i mask128, __m128i val128); + notify_all(const void* data, u32 size, __m128i mask128); public: static void set_wait_callback(bool(*cb)(const void* data, u64 attempts, u64 stamp0)); @@ -1504,18 +1504,18 @@ class atomic_t } // Notify with mask and value, allowing to not wake up thread which doesn't wait on them - void notify_one(type mask_value, type new_value) noexcept + [[deprecated("Incomplete")]] void notify_one(type mask_value, type phantom_value) noexcept { if constexpr (sizeof(T) <= 8) { const __m128i mask = _mm_cvtsi64_si128(std::bit_cast>(mask_value)); - const __m128i _new = _mm_cvtsi64_si128(std::bit_cast>(new_value)); + const __m128i _new = _mm_cvtsi64_si128(std::bit_cast>(phantom_value)); atomic_wait_engine::notify_one(&m_data, sizeof(T), mask, _new); } else if constexpr (sizeof(T) == 16) { const __m128i mask = std::bit_cast<__m128i>(mask_value); - const __m128i _new = std::bit_cast<__m128i>(new_value); + const __m128i _new = std::bit_cast<__m128i>(phantom_value); atomic_wait_engine::notify_one(&m_data, sizeof(T), mask, _new); } } @@ -1524,11 +1524,11 @@ class atomic_t { if constexpr (sizeof(T) <= 8) { - atomic_wait_engine::notify_all(&m_data, -1, _mm_cvtsi64_si128(UINT64_MAX >> ((64 - sizeof(T) * 8) & 63)), _mm_setzero_si128()); + atomic_wait_engine::notify_all(&m_data, -1, _mm_cvtsi64_si128(UINT64_MAX >> ((64 - sizeof(T) * 8) & 63))); } else if constexpr (sizeof(T) == 16) { - atomic_wait_engine::notify_all(&m_data, -1, _mm_set1_epi64x(-1), _mm_setzero_si128()); + atomic_wait_engine::notify_all(&m_data, -1, _mm_set1_epi64x(-1)); } } @@ -1538,29 +1538,12 @@ class atomic_t if constexpr (sizeof(T) <= 8) { const __m128i mask = _mm_cvtsi64_si128(std::bit_cast>(mask_value)); - atomic_wait_engine::notify_all(&m_data, -1, mask, _mm_setzero_si128()); + atomic_wait_engine::notify_all(&m_data, -1, mask); } else if constexpr (sizeof(T) == 16) { const __m128i mask = std::bit_cast<__m128i>(mask_value); - atomic_wait_engine::notify_all(&m_data, -1, mask, _mm_setzero_si128()); - } - } - - // Notify all threads with mask and value, allowing to not wake up threads which don't wait on them - void notify_all(type mask_value, type new_value) noexcept - { - if constexpr (sizeof(T) <= 8) - { - const __m128i mask = _mm_cvtsi64_si128(std::bit_cast>(mask_value)); - const __m128i _new = _mm_cvtsi64_si128(std::bit_cast>(new_value)); - atomic_wait_engine::notify_all(&m_data, sizeof(T), mask, _new); - } - else if constexpr (sizeof(T) == 16) - { - const __m128i mask = std::bit_cast<__m128i>(mask_value); - const __m128i _new = std::bit_cast<__m128i>(new_value); - atomic_wait_engine::notify_all(&m_data, sizeof(T), mask, _new); + atomic_wait_engine::notify_all(&m_data, -1, mask); } } }; diff --git a/rpcs3/util/slow_mutex.hpp b/rpcs3/util/slow_mutex.hpp index 9c5d8ba28e15..36e716863b08 100644 --- a/rpcs3/util/slow_mutex.hpp +++ b/rpcs3/util/slow_mutex.hpp @@ -18,16 +18,16 @@ class slow_mutex { const u8 prev = m_value.fetch_op([](u8& val) { - if (val == umax) [[unlikely]] + if ((val & 0x7f) == 0x7f) [[unlikely]] return; val++; }); - if (prev == umax) [[unlikely]] + if ((prev & 0x7f) == 0x7f) [[unlikely]] { // Keep trying until counter can be incremented - m_value.wait(0xff, 0x01); + m_value.wait(0x7f, 0x7f); } else if (prev == 0) { @@ -41,8 +41,9 @@ class slow_mutex } } - // Wait for 7 bits to become 0, which could only mean one thing - m_value.wait(0, 0xfe); + // Wait for signal bit + m_value.wait(0, 0x80); + m_value &= ~0x80; } bool try_lock() noexcept @@ -63,13 +64,17 @@ class slow_mutex fmt::raw_error("I tried to unlock unlocked mutex." HERE); } - // Normal notify with forced value (ignoring real waiter count) - m_value.notify_one(0xfe, 0); + // Set signal and notify + if (prev & 0x7f) + { + m_value |= 0x80; + m_value.notify_one(0x80); + } - if (prev == umax) [[unlikely]] + if ((prev & 0x7f) == 0x7f) [[unlikely]] { // Overflow notify: value can be incremented - m_value.notify_one(0x01, 0); + m_value.notify_one(0x7f); } } From bd90e3e37f48043931ecf89511a85bd3fc5a11be Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 26 Nov 2020 07:35:25 +0300 Subject: [PATCH 2/5] atomic.cpp: shrink and simplify main hashtable Reduce collision detection to 1 or 0 for now. I think it should be offloaded to notifiers. --- rpcs3/Emu/System.cpp | 16 ++---- rpcs3/util/atomic.cpp | 122 +++++++++++++----------------------------- 2 files changed, 41 insertions(+), 97 deletions(-) diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 2575931393d0..37b42ccd4997 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -89,7 +89,7 @@ namespace namespace atomic_wait { - extern void parse_hashtable(bool(*cb)(u64 id, u16 refs, u32 ptr, u32 stats)); + extern void parse_hashtable(bool(*cb)(u64 id, u32 refs, u64 ptr, u32 stats)); } template<> @@ -1919,19 +1919,13 @@ void Emulator::Stop(bool restart) aw_colc = 0; aw_used = 0; - atomic_wait::parse_hashtable([](u64 id, u16 refs, u32 ptr, u32 stats) -> bool + atomic_wait::parse_hashtable([](u64 id, u32 refs, u64 ptr, u32 maxc) -> bool { - aw_refs += refs; + aw_refs += refs != 0; aw_used += ptr != 0; - stats = (stats & 0xaaaaaaaa) / 2 + (stats & 0x55555555); - stats = (stats & 0xcccccccc) / 4 + (stats & 0x33333333); - stats = (stats & 0xf0f0f0f0) / 16 + (stats & 0xf0f0f0f); - stats = (stats & 0xff00ff00) / 256 + (stats & 0xff00ff); - stats = (stats >> 16) + (stats & 0xffff); - - aw_colm = std::max(aw_colm, stats); - aw_colc += stats != 0; + aw_colm = std::max(aw_colm, maxc); + aw_colc += maxc != 0; return false; }); diff --git a/rpcs3/util/atomic.cpp b/rpcs3/util/atomic.cpp index f9484bd6d765..3f068f27de4b 100644 --- a/rpcs3/util/atomic.cpp +++ b/rpcs3/util/atomic.cpp @@ -21,7 +21,7 @@ #include "endian.hpp" // Total number of entries, should be a power of 2. -static constexpr std::size_t s_hashtable_size = 1u << 17; +static constexpr std::size_t s_hashtable_size = 1u << 16; // Reference counter combined with shifted pointer (which is assumed to be 47 bit) static constexpr std::uintptr_t s_ref_mask = (1u << 17) - 1; @@ -778,16 +778,19 @@ namespace { struct alignas(16) slot_allocator { - u64 ref : 16; - u64 low : 48; - u64 high; + u64 ref : 16; // Ref counter + u64 bits: 24; // Allocated bits + u64 prio: 24; // Reserved + + u64 maxc: 17; // Collision counter + u64 iptr: 47; // First pointer to use slot (to count used slots) }; // Need to spare 16 bits for ref counter - static constexpr u64 max_threads = 112; + static constexpr u64 max_threads = 24; - // (Arbitrary, not justified) Can only allow extended allocations go as far as this (about 585) - static constexpr u64 max_distance = UINT16_MAX / max_threads; + // (Arbitrary, not justified) Can only allow extended allocations go as far as this + static constexpr u64 max_distance = 500; // Thread list struct alignas(64) root_info @@ -798,12 +801,6 @@ namespace // Allocation pool, pointers to allocated semaphores atomic_t slots[max_threads]; - // For collision statistics (32 middle bits) - atomic_t first_ptr; - - // For collision statistics (bit difference stick flags) - atomic_t diff_lz, diff_tz, diff_pop; - static atomic_t* slot_alloc(std::uintptr_t ptr) noexcept; static void slot_free(std::uintptr_t ptr, atomic_t* slot, u32 tls_slot) noexcept; @@ -811,10 +808,11 @@ namespace template static auto slot_search(std::uintptr_t iptr, u32 size, u64 thread_id, __m128i mask, F func) noexcept; - void register_collisions(std::uintptr_t ptr); + // Somehow update information about collisions (TODO) + void register_collisions(std::uintptr_t ptr, u64 max_coll); }; - static_assert(sizeof(root_info) == 256); + static_assert(sizeof(root_info) == 64); } // Main hashtable for atomic wait. @@ -887,27 +885,23 @@ atomic_t* root_info::slot_alloc(std::uintptr_t ptr) noexcept return nullptr; } + if (bits.iptr == 0) + bits.iptr = ptr; + if (bits.maxc == 0 && bits.iptr != ptr && bits.ref) + bits.maxc = 1; + bits.ref++; - if (~bits.high) + if (bits.bits != (1ull << max_threads) - 1) { - const u32 id = std::countr_one(bits.high); - bits.high |= bits.high + 1; + const u32 id = std::countr_one(bits.bits); + bits.bits |= bits.bits + 1; return _this->slots + id; } - if (~bits.low << 16) - { - const u32 id = std::countr_one(bits.low); - bits.low |= bits.low + 1; - return _this->slots + 64 + id; - } - return nullptr; }); - _this->register_collisions(ptr); - if (slot) { break; @@ -918,7 +912,7 @@ atomic_t* root_info::slot_alloc(std::uintptr_t ptr) noexcept if (limit == max_distance) [[unlikely]] { - fmt::raw_error("Distance limit (585) exceeded for the atomic wait hashtable."); + fmt::raw_error("Distance limit (500) exceeded for the atomic wait hashtable."); return nullptr; } } @@ -926,44 +920,17 @@ atomic_t* root_info::slot_alloc(std::uintptr_t ptr) noexcept return slot; } -void root_info::register_collisions(std::uintptr_t ptr) +void root_info::register_collisions(std::uintptr_t ptr, u64 max_coll) { - u32 ptr32 = static_cast(ptr >> 16); - u32 first = first_ptr.load(); - - if (!first && first != ptr32) + bits.atomic_op([&](slot_allocator& bits) { - // Register first used pointer - first = first_ptr.compare_and_swap(0, ptr32); - } - - if (first && first != ptr32) - { - // Difference bits between pointers - u32 diff = first ^ ptr32; - - // The most significant different bit - u32 diff1 = std::countl_zero(diff); - - if (diff1 < 32) - { - diff_lz |= 1u << diff1; - } - - u32 diff2 = std::countr_zero(diff); - - if (diff2 < 32) - { - diff_tz |= 1u << diff2; - } - - diff = (diff & 0xaaaaaaaa) / 2 + (diff & 0x55555555); - diff = (diff & 0xcccccccc) / 4 + (diff & 0x33333333); - diff = (diff & 0xf0f0f0f0) / 16 + (diff & 0x0f0f0f0f); - diff = (diff & 0xff00ff00) / 256 + (diff & 0x00ff00ff); - - diff_pop |= 1u << static_cast((diff >> 16) + diff - 1); - } + if (bits.iptr == 0) + bits.iptr = ptr; + if (bits.maxc == 0 && bits.iptr != ptr) + bits.maxc = 1; + if (bits.maxc < max_coll) + bits.maxc = max_coll; + }); } void root_info::slot_free(std::uintptr_t iptr, atomic_t* slot, u32 tls_slot) noexcept @@ -1008,14 +975,7 @@ void root_info::slot_free(std::uintptr_t iptr, atomic_t* slot, u32 tls_slot if (_this == curr.current) { - if (diff < 64) - { - bits.high &= ~(1ull << diff); - } - else - { - bits.low &= ~(1ull << (diff - 64)); - } + bits.bits &= ~(1ull << diff); } }); @@ -1044,19 +1004,9 @@ FORCE_INLINE auto root_info::slot_search(std::uintptr_t iptr, u32 size, u64 thre u16 cond_ids[max_threads]; u32 cond_count = 0; - u64 high_val = bits.high; - u64 low_val = bits.low; - - for (u64 bits = high_val; bits; bits &= bits - 1) - { - if (u16 cond_id = _this->slots[std::countr_zero(bits)]) - { - utils::prefetch_read(s_cond_list + cond_id); - cond_ids[cond_count++] = cond_id; - } - } + u64 bits_val = bits.bits; - for (u64 bits = low_val; bits; bits &= bits - 1) + for (u64 bits = bits_val; bits; bits &= bits - 1) { if (u16 cond_id = _this->slots[std::countr_zero(bits)]) { @@ -1651,14 +1601,14 @@ atomic_wait_engine::notify_all(const void* data, u32 size, __m128i mask) namespace atomic_wait { - extern void parse_hashtable(bool(*cb)(u64 id, u16 refs, u32 ptr, u32 stats)) + extern void parse_hashtable(bool(*cb)(u64 id, u32 refs, u64 ptr, u32 max_coll)) { for (u64 i = 0; i < s_hashtable_size; i++) { const auto root = &s_hashtable[i]; const auto slot = root->bits.load(); - if (cb(i, static_cast(slot.ref), root->first_ptr.load(), root->diff_lz | root->diff_tz | root->diff_pop)) + if (cb(i, static_cast(slot.ref), slot.iptr, static_cast(slot.maxc))) { break; } From b5d498ffdafbb67cfa48d178751f8a51ee634b4f Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 26 Nov 2020 12:30:51 +0300 Subject: [PATCH 3/5] Homebrew atomic_ptr rewritten (util/shared_ptr.hpp) It's analogous to C++20 atomic std::shared_ptr The following things brought into global namespace: single_ptr shared_ptr atomic_ptr make_single --- Utilities/Config.cpp | 2 +- Utilities/Config.h | 10 +- Utilities/Thread.cpp | 4 +- Utilities/Thread.h | 8 +- rpcs3/Emu/CMakeLists.txt | 1 - rpcs3/Emu/Cell/PPUThread.cpp | 4 +- rpcs3/Emu/Cell/PPUThread.h | 2 +- rpcs3/Emu/Cell/SPUThread.cpp | 4 +- rpcs3/Emu/Cell/SPUThread.h | 2 +- rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp | 2 +- rpcs3/emucore.vcxproj | 5 +- rpcs3/emucore.vcxproj.filters | 8 +- rpcs3/util/shared_cptr.cpp | 73 --- rpcs3/util/shared_cptr.hpp | 493 ------------------ rpcs3/util/shared_ptr.hpp | 711 ++++++++++++++++++++++++++ 15 files changed, 732 insertions(+), 597 deletions(-) delete mode 100644 rpcs3/util/shared_cptr.cpp delete mode 100644 rpcs3/util/shared_cptr.hpp create mode 100644 rpcs3/util/shared_ptr.hpp diff --git a/Utilities/Config.cpp b/Utilities/Config.cpp index a5012dfe9e70..cde0ba6365ff 100644 --- a/Utilities/Config.cpp +++ b/Utilities/Config.cpp @@ -380,7 +380,7 @@ void cfg::_bool::from_default() void cfg::string::from_default() { - m_value = m_value.make(def); + m_value = def; } void cfg::set_entry::from_default() diff --git a/Utilities/Config.h b/Utilities/Config.h index 91b300adf8b1..51dfaba912fb 100644 --- a/Utilities/Config.h +++ b/Utilities/Config.h @@ -4,7 +4,7 @@ #include "Utilities/StrFmt.h" #include "util/logs.hpp" #include "util/atomic.hpp" -#include "util/shared_cptr.hpp" +#include "util/shared_ptr.hpp" #include #include @@ -393,7 +393,7 @@ namespace cfg { const std::string m_name; - stx::atomic_cptr m_value; + atomic_ptr m_value; public: std::string def; @@ -401,7 +401,7 @@ namespace cfg string(node* owner, std::string name, std::string def = {}, bool dynamic = false) : _base(type::string, owner, name, dynamic) , m_name(std::move(name)) - , m_value(m_value.make(def)) + , m_value(def) , def(std::move(def)) { } @@ -411,7 +411,7 @@ namespace cfg return *m_value.load().get(); } - std::pair> get() const + std::pair> get() const { auto v = m_value.load(); @@ -440,7 +440,7 @@ namespace cfg bool from_string(const std::string& value, bool /*dynamic*/ = false) override { - m_value = m_value.make(value); + m_value = value; return true; } }; diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 1e750f66d79f..a6f5e1df6843 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -2242,7 +2242,7 @@ std::string thread_ctrl::get_name_cached() return {}; } - static thread_local stx::shared_cptr name_cache; + static thread_local shared_ptr name_cache; if (!_this->m_tname.is_equal(name_cache)) [[unlikely]] { @@ -2254,7 +2254,7 @@ std::string thread_ctrl::get_name_cached() thread_base::thread_base(native_entry entry, std::string_view name) : entry_point(entry) - , m_tname(stx::shared_cptr::make(name)) + , m_tname(make_single(name)) { } diff --git a/Utilities/Thread.h b/Utilities/Thread.h index 401e174fae44..cd1cce8613ca 100644 --- a/Utilities/Thread.h +++ b/Utilities/Thread.h @@ -2,7 +2,7 @@ #include "types.h" #include "util/atomic.hpp" -#include "util/shared_cptr.hpp" +#include "util/shared_ptr.hpp" #include #include @@ -110,7 +110,7 @@ class thread_base atomic_t m_sync{0}; // Thread name - stx::atomic_cptr m_tname; + atomic_ptr m_tname; // Start thread void start(); @@ -191,14 +191,14 @@ class thread_ctrl final // Set current thread name (not recommended) static void set_name(std::string_view name) { - g_tls_this_thread->m_tname.store(stx::shared_cptr::make(name)); + g_tls_this_thread->m_tname.store(make_single(name)); } // Set thread name (not recommended) template static void set_name(named_thread& thread, std::string_view name) { - static_cast(thread).m_tname.store(stx::shared_cptr::make(name)); + static_cast(thread).m_tname.store(make_single(name)); } template diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index ac68081f2a39..1a4ca66deb1e 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -34,7 +34,6 @@ target_include_directories(rpcs3_emu target_sources(rpcs3_emu PRIVATE ../util/atomic.cpp ../util/atomic2.cpp - ../util/shared_cptr.cpp ../util/fixed_typemap.cpp ../util/logs.cpp ../util/yaml.cpp diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 76425d2f58ff..b981ae98272d 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -915,7 +915,7 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3 , joiner(detached != 0 ? ppu_join_status::detached : ppu_join_status::joinable) , entry_func(param.entry) , start_time(get_guest_system_time()) - , ppu_tname(stx::shared_cptr::make(name)) + , ppu_tname(make_single(name)) { gpr[1] = stack_addr + stack_size - ppu_stack_start_offset; @@ -1020,7 +1020,7 @@ void ppu_thread::fast_call(u32 addr, u32 rtoc) { const auto _this = static_cast(get_current_cpu_thread()); - static thread_local stx::shared_cptr name_cache; + static thread_local shared_ptr name_cache; if (!_this->ppu_tname.is_equal(name_cache)) [[unlikely]] { diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index fbdfd712b102..75e3289ece8c 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -215,7 +215,7 @@ class ppu_thread : public cpu_thread const char* last_function{}; // Sticky copy of current_function, is not cleared on function return // Thread name - stx::atomic_cptr ppu_tname; + atomic_ptr ppu_tname; u64 last_ftsc = 0; u64 last_ftime = 0; diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 06405159591e..0a5716ddeb3d 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1572,7 +1572,7 @@ void spu_thread::cpu_task() { const auto cpu = static_cast(get_current_cpu_thread()); - static thread_local stx::shared_cptr name_cache; + static thread_local shared_ptr name_cache; if (!cpu->spu_tname.is_equal(name_cache)) [[unlikely]] { @@ -1692,7 +1692,7 @@ spu_thread::spu_thread(lv2_spu_group* group, u32 index, std::string_view name, u , group(group) , option(option) , lv2_id(lv2_id) - , spu_tname(stx::shared_cptr::make(name)) + , spu_tname(make_single(name)) { if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit) { diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index bf2b0d4a6ae3..c7043988763a 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -747,7 +747,7 @@ class spu_thread : public cpu_thread const u32 lv2_id; // The actual id that is used by syscalls // Thread name - stx::atomic_cptr spu_tname; + atomic_ptr spu_tname; std::unique_ptr jit; // Recompiler instance diff --git a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp index 5fece13330a8..c19faa20e465 100644 --- a/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_ppu_thread.cpp @@ -548,7 +548,7 @@ error_code sys_ppu_thread_rename(ppu_thread& ppu, u32 thread_id, vm::cptr const auto pname = name.get_ptr(); // Make valid name - auto _name = stx::shared_cptr::make(pname, std::find(pname, pname + max_size, '\0')); + auto _name = make_single(pname, std::find(pname, pname + max_size, '\0')); // thread_ctrl name is not changed (TODO) sys_ppu_thread.warning(u8"sys_ppu_thread_rename(): Thread renamed to “%s”", *_name); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 48564fbe96b0..a77465c1524d 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -130,9 +130,6 @@ NotUsing - - NotUsing - NotUsing @@ -778,7 +775,7 @@ - + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 3e2d6b0f482c..1614fbce78e6 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -878,9 +878,6 @@ Utilities - - Utilities - Utilities @@ -944,9 +941,6 @@ Utilities - - Utilities - Emu\Audio\FAudio @@ -1837,7 +1831,7 @@ Utilities - + Utilities diff --git a/rpcs3/util/shared_cptr.cpp b/rpcs3/util/shared_cptr.cpp deleted file mode 100644 index a3d9bc998101..000000000000 --- a/rpcs3/util/shared_cptr.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include "shared_cptr.hpp" - -#include - -stx::atomic_base::ptr_type stx::atomic_base::ref_halve() const noexcept -{ - ptr_type v = val_load(); - - while (true) - { - if (!(v & c_ref_mask)) - { - // Nullptr or depleted reference pool - return 0; - } - else if (val_compare_exchange(v, (v & ~c_ref_mask) | (v & c_ref_mask) >> 1)) - { - break; - } - } - - // Return acquired references (rounded towards zero) - return (v & ~c_ref_mask) | ((v & c_ref_mask) - ((v & c_ref_mask) >> 1) - 1); -} - -stx::atomic_base::ptr_type stx::atomic_base::ref_load() const noexcept -{ - ptr_type v = val_load(); - - while (true) - { - if (!(v & c_ref_mask)) - { - if (v == 0) - { - // Null pointer - return 0; - } - - // Busy wait - std::this_thread::yield(); - v = val_load(); - } - else if (val_compare_exchange(v, v - 1)) - { - break; - } - } - - // Obtained 1 reference from the atomic pointer - return v & ~c_ref_mask; -} - -void stx::atomic_base::ref_fix(stx::atomic_base::ptr_type& _old) const noexcept -{ - ptr_type old = _old & ~c_ref_mask; - ptr_type v = val_load(); - - while (true) - { - if ((v & ~c_ref_mask) != old || (v & c_ref_mask) == c_ref_mask) - { - // Can't return a reference to the original atomic pointer, so keep it - _old += 1; - return; - } - - if (val_compare_exchange(v, v + 1)) - { - break; - } - } -} diff --git a/rpcs3/util/shared_cptr.hpp b/rpcs3/util/shared_cptr.hpp deleted file mode 100644 index 8defb45b8aa7..000000000000 --- a/rpcs3/util/shared_cptr.hpp +++ /dev/null @@ -1,493 +0,0 @@ -#pragma once - -#include -#include -#include - -#ifdef _MSC_VER -#include -#endif - -namespace stx -{ - template - class shared_data; - - template - class unique_data; - - // Common internal layout - class atomic_base - { - public: - #if defined(__x86_64__) || defined(_M_X64) - using ptr_type = long long; - - static const long long c_ref_init = 0x10000; - static const ptr_type c_ref_mask = 0xffff; - static const ptr_type c_ptr_mask = ~0; - static const ptr_type c_ptr_shift = 16; - static const auto c_ptr_align = alignof(long long); - #else - using ptr_type = unsigned long long; - - static const long long c_ref_init = 0x10; - static const ptr_type c_ref_mask = 0xf; - static const ptr_type c_ptr_mask = ~c_ref_mask; - static const ptr_type c_ptr_shift = 0; - static const auto c_ptr_align = 16; - #endif - - protected: - // Combined borrowed refcounter and object pointer - mutable ptr_type m_val; - - template - friend class atomic_cptr; - - constexpr atomic_base() noexcept - : m_val(0) - { - } - - explicit constexpr atomic_base(ptr_type val) noexcept - : m_val(val) - { - } - - template - explicit atomic_base(shared_data* ptr) noexcept - : m_val(reinterpret_cast(ptr) << c_ptr_shift) - { - if (ptr) - { - // Fixup reference counter - m_val |= (ptr->m_ref_count - 1) & c_ref_mask; - } - } - - template - shared_data* ptr_get() const noexcept - { - return reinterpret_cast*>(val_load() >> c_ptr_shift & c_ptr_mask); - } - - ptr_type val_load() const noexcept - { - #ifdef _MSC_VER - return const_cast(m_val); - #else - return __atomic_load_n(&m_val, __ATOMIC_SEQ_CST); - #endif - } - - ptr_type val_exchange(ptr_type val) noexcept - { - #ifdef _MSC_VER - return _InterlockedExchange64(&m_val, val); - #else - return __atomic_exchange_n(&m_val, val, __ATOMIC_SEQ_CST); - #endif - } - - bool val_compare_exchange(ptr_type& expected, ptr_type val) const noexcept - { - #ifdef _MSC_VER - ptr_type x = expected; - expected = _InterlockedCompareExchange64(&m_val, val, x); - return x == expected; - #else - return __atomic_compare_exchange_n(&m_val, &expected, val, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); - #endif - } - - // Load, acquiring half of the references from the pointer - ptr_type ref_halve() const noexcept; - - // Load, actively acquiring a reference from the pointer - ptr_type ref_load() const noexcept; - - // Return acquired reference if applicable - void ref_fix(ptr_type& old) const noexcept; - }; - - // Control block with data and reference counter - template - class alignas(atomic_base::c_ptr_align) shared_data final - { - public: - // Immutable data - T m_data; - - // Main reference counter - long long m_ref_count = atomic_base::c_ref_init; - - template - explicit constexpr shared_data(Args&&... args) noexcept - : m_data(std::forward(args)...) - { - static_assert(offsetof(shared_data, m_data) == 0); - } - - // Atomic access to the ref counter - long long fetch_add(long long value) - { - #ifdef _MSC_VER - return _InterlockedExchangeAdd64(&m_ref_count, value); - #else - return __atomic_fetch_add(&m_ref_count, value, __ATOMIC_SEQ_CST); - #endif - } - }; - - // Unique ownership pointer to mutable data, suitable for conversion to shared_cptr - template - class unique_data : protected atomic_base - { - using cb = atomic_base; - - public: - constexpr unique_data() noexcept - : atomic_base() - { - } - - explicit unique_data(shared_data* data) noexcept - : atomic_base(data) - { - } - - unique_data(const unique_data&) = delete; - - unique_data(unique_data&& r) noexcept - : atomic_base(r.m_val) - { - r.m_val = 0; - } - - unique_data& operator=(const unique_data&) = delete; - - unique_data& operator=(unique_data&& r) noexcept - { - unique_data(std::move(r)).swap(*this); - return *this; - } - - ~unique_data() - { - reset(); - } - - void reset() noexcept - { - delete get(); - this->m_val = 0; - } - - void swap(unique_data& r) noexcept - { - std::swap(this->m_val, r.m_val); - } - - [[nodiscard]] shared_data* release() noexcept - { - auto result = this->ptr_get(); - this->m_val = 0; - return result; - } - - T* get() const noexcept - { - return &this->ptr_get()->m_data; - } - - T& operator*() const noexcept - { - return *get(); - } - - T* operator->() const noexcept - { - return get(); - } - - explicit operator bool() const noexcept - { - return this->m_val != 0; - } - - template - static unique_data make(Args&&... args) noexcept - { - return unique_data(new shared_data(std::forward(args)...)); - } - }; - - // Shared pointer to immutable data - template - class shared_cptr : protected atomic_base - { - using cb = atomic_base; - - protected: - using atomic_base::m_val; - - public: - constexpr shared_cptr() noexcept - : atomic_base() - { - } - - explicit shared_cptr(shared_data* data) noexcept - : atomic_base(data) - { - } - - shared_cptr(const shared_cptr& r) noexcept - : atomic_base() - { - if (const auto old_val = r.val_load()) - { - // Try to take references from the former pointer first - if (const auto new_val = r.ref_halve()) - { - this->m_val = new_val; - } - else - { - // If it fails, fallback to the control block and take max amount - this->m_val = old_val | cb::c_ref_mask; - this->ptr_get()->fetch_add(cb::c_ref_init); - } - } - } - - shared_cptr(shared_cptr&& r) noexcept - : atomic_base(r.m_val) - { - r.m_val = 0; - } - - shared_cptr(unique_data r) noexcept - : atomic_base(r.m_val) - { - r.m_val = 0; - } - - shared_cptr& operator=(const shared_cptr& r) noexcept - { - shared_cptr(r).swap(*this); - return *this; - } - - shared_cptr& operator=(shared_cptr&& r) noexcept - { - shared_cptr(std::move(r)).swap(*this); - return *this; - } - - ~shared_cptr() - { - reset(); - } - - // Set to null - void reset() noexcept - { - if (const auto pdata = this->ptr_get()) - { - // Remove references - const auto count = (cb::c_ref_mask & this->m_val) + 1; - - this->m_val = 0; - - if (pdata->fetch_add(-count) == count) - { - // Destroy if reference count became zero - delete pdata; - } - } - } - - // Possibly return reference(s) to specified shared pointer instance - void reset_hint(const shared_cptr& r) noexcept - { - // TODO - reset(); - } - - // Set to null, possibly returning a unique instance of shared data - unique_data release_unique() noexcept - { - if (const auto pdata = this->ptr_get()) - { - // Remove references - const auto count = (cb::c_ref_mask & this->m_val) + 1; - - this->m_val = 0; - - if (pdata->fetch_add(-count) == count) - { - // Return data if reference count became zero - pdata->m_ref_count = cb::c_ref_init; - return unique_data(pdata); - } - } - - return {}; - } - - void swap(shared_cptr& r) noexcept - { - std::swap(this->m_val, r.m_val); - } - - std::conditional_t get() const noexcept - { - return &this->ptr_get()->m_data; - } - - std::conditional_t operator*() const noexcept - { - return *get(); - } - - std::conditional_t operator->() const noexcept - { - return get(); - } - - explicit operator bool() const noexcept - { - return this->val_load() != 0; - } - - bool operator ==(const shared_cptr& rhs) const noexcept - { - return get() == rhs.get(); - } - - bool operator !=(const shared_cptr& rhs) const noexcept - { - return get() != rhs.get(); - } - - template - static shared_cptr make(Args&&... args) noexcept - { - return shared_cptr(new shared_data(std::forward(args)...)); - } - }; - - template - using shared_mptr = shared_cptr; - - // Atomic shared pointer to immutable data - template - class atomic_cptr : shared_cptr - { - using cb = atomic_base; - using base = shared_cptr; - - public: - constexpr atomic_cptr() noexcept - : base() - { - } - - atomic_cptr(base value) - : base(std::move(value)) - { - if (const auto diff = cb::c_ref_mask - (this->m_val & cb::c_ref_mask); this->m_val && diff) - { - // Obtain max amount of references - this->template ptr_get()->fetch_add(diff); - this->m_val |= cb::c_ref_mask; - } - } - - atomic_cptr(const atomic_cptr&) = delete; - - atomic_cptr& operator=(const atomic_cptr&) = delete; - - atomic_cptr& operator=(base value) noexcept - { - exchange(std::move(value)); - return *this; - } - - void store(base value) noexcept - { - exchange(std::move(value)); - } - - base load() const noexcept - { - base result; - static_cast(result).m_val = this->ref_load(); - - if (result) - { - // TODO: obtain max-1 and try to return as much as possible - this->template ptr_get()->fetch_add(1); - this->ref_fix(static_cast(result).m_val); - } - - return result; - } - - operator base() const noexcept - { - return load(); - } - - base exchange(base value) noexcept - { - static_cast(value).m_val = this->val_exchange(static_cast(value).m_val); - return value; - } - - // Simple atomic load is much more effective than load(), but it's a non-owning reference - const void* observe() const noexcept - { - return this->get(); - } - - explicit operator bool() const noexcept - { - return observe() != nullptr; - } - - bool is_equal(const base& r) const noexcept - { - return observe() == r.get(); - } - - // bool compare_and_swap_test_weak(const base& expected, base value) noexcept - // { - - // } - - // bool compare_and_swap_test(const base& expected, base value) noexcept - // { - - // } - - // bool compare_exchange_weak(base& expected, base value) noexcept - // { - // // TODO - // } - - // bool compare_exchange(base& expected, base value) noexcept - // { - // // TODO - // } - - // void atomic_op(); - - using base::make; - }; - - template - using atomic_mptr = atomic_cptr; -} diff --git a/rpcs3/util/shared_ptr.hpp b/rpcs3/util/shared_ptr.hpp new file mode 100644 index 000000000000..acb855347f69 --- /dev/null +++ b/rpcs3/util/shared_ptr.hpp @@ -0,0 +1,711 @@ +#pragma once + +#include +#include +#include "atomic.hpp" + +namespace stx +{ + // TODO + template + constexpr bool is_same_ptr_v = true; + + template + constexpr bool is_same_ptr_cast_v = std::is_convertible_v && is_same_ptr_v; + + template + class single_ptr; + + template + class shared_ptr; + + template + class atomic_ptr; + + // Basic assumption of userspace pointer size + constexpr uint c_ptr_size = 47; + + // Use lower 17 bits as atomic_ptr internal refcounter (pointer is shifted) + constexpr uint c_ref_mask = 0x1ffff, c_ref_size = 17; + + struct shared_counter + { + // Stored destructor + void (*destroy)(void* ptr); + + // Reference counter + atomic_t refs{0}; + }; + + template + class unique_data + { + public: + T data; + + template + explicit constexpr unique_data(Args&&... args) noexcept + : data(std::forward(args)...) + { + } + }; + + template + class unique_data + { + std::size_t count; + }; + + // Control block with data and reference counter + template + class alignas(T) shared_data final : public shared_counter, public unique_data + { + public: + using data_type = T; + + template + explicit constexpr shared_data(Args&&... args) noexcept + : shared_counter{} + , unique_data(std::forward(args)...) + { + } + }; + + template + class alignas(T) shared_data final : public shared_counter, public unique_data + { + public: + using data_type = T; + }; + + // Simplified unique pointer (well, not simplified, std::unique_ptr is preferred) + template + class single_ptr + { + std::remove_extent_t* m_ptr{}; + + shared_data* d() const noexcept + { + // Shared counter, deleter, should be at negative offset + return std::launder(static_cast*>(reinterpret_cast*>(m_ptr))); + } + + template + friend class shared_ptr; + + template + friend class atomic_ptr; + + public: + using pointer = T*; + + using element_type = std::remove_extent_t; + + constexpr single_ptr() noexcept = default; + + constexpr single_ptr(std::nullptr_t) noexcept {} + + single_ptr(const single_ptr&) = delete; + + single_ptr(single_ptr&& r) noexcept + : m_ptr(r.m_ptr) + { + r.m_ptr = nullptr; + } + + template >> + single_ptr(single_ptr&& r) noexcept + : m_ptr(r.m_ptr) + { + r.m_ptr = nullptr; + } + + ~single_ptr() + { + reset(); + } + + single_ptr& operator=(const single_ptr&) = delete; + + single_ptr& operator=(std::nullptr_t) noexcept + { + reset(); + } + + single_ptr& operator=(single_ptr&& r) noexcept + { + m_ptr = r.m_ptr; + r.m_ptr = nullptr; + return *this; + } + + template >> + single_ptr& operator=(single_ptr&& r) noexcept + { + m_ptr = r.m_ptr; + r.m_ptr = nullptr; + return *this; + } + + void reset() noexcept + { + if (m_ptr) [[likely]] + { + d()->destroy(d()); + m_ptr = nullptr; + } + } + + void swap(single_ptr& r) noexcept + { + std::swap(m_ptr, r.m_ptr); + } + + element_type* get() const noexcept + { + return m_ptr; + } + + decltype(auto) operator*() const noexcept + { + if constexpr (std::is_void_v) + { + return; + } + else + { + return *m_ptr; + } + } + + element_type* operator->() const noexcept + { + return m_ptr; + } + + decltype(auto) operator[](std::ptrdiff_t idx) const noexcept + { + if constexpr (std::is_void_v) + { + return; + } + else if constexpr (std::is_array_v) + { + return m_ptr[idx]; + } + else + { + return *m_ptr; + } + } + + explicit constexpr operator bool() const noexcept + { + return m_ptr != nullptr; + } + }; + + template + static std::enable_if_t) && (Init || !sizeof...(Args)), single_ptr> make_single(Args&&... args) noexcept + { + shared_data* ptr = nullptr; + + if constexpr (Init) + { + ptr = new shared_data(std::forward(args)...); + } + else + { + ptr = new shared_data; + } + + ptr->destroy = [](void* p) + { + delete static_cast*>(p); + }; + + single_ptr r; + reinterpret_cast*&>(r) = &ptr->data; + return r; + } + + template + static std::enable_if_t, single_ptr> make_single(std::size_t count) noexcept + { + const std::size_t size = sizeof(shared_data) + count * sizeof(std::remove_extent_t); + + std::byte* bytes = nullptr; + + if constexpr (alignof(std::remove_extent_t) > (__STDCPP_DEFAULT_NEW_ALIGNMENT__)) + { + bytes = new (std::align_val_t{alignof(std::remove_extent_t)}) std::byte[size]; + } + else + { + bytes = new std::byte[size]; + } + + // Initialize control block + shared_data* ptr = new (reinterpret_cast*>(bytes)) shared_data(); + + // Initialize array next to the control block + T arr = reinterpret_cast(ptr + 1); + + if constexpr (Init) + { + std::uninitialized_value_construct_n(arr, count); + } + else + { + std::uninitialized_default_construct_n(arr, count); + } + + ptr->m_count = count; + + ptr->destroy = [](void* p) + { + shared_data* ptr = static_cast*>(p); + + std::destroy_n(std::launder(reinterpret_cast(ptr + 1)), ptr->m_count); + + ptr->~shared_data(); + + if constexpr (alignof(std::remove_extent_t) > (__STDCPP_DEFAULT_NEW_ALIGNMENT__)) + { + ::operator delete[](reinterpret_cast(p), std::align_val_t{alignof(std::remove_extent_t)}); + } + else + { + delete[] reinterpret_cast(p); + } + }; + + single_ptr r; + reinterpret_cast*&>(r) = std::launder(arr); + return r; + } + + // Simplified shared pointer + template + class shared_ptr + { + std::remove_extent_t* m_ptr{}; + + shared_data* d() const noexcept + { + // Shared counter, deleter, should be at negative offset + return std::launder(static_cast*>(reinterpret_cast*>(m_ptr))); + } + + template + friend class atomic_ptr; + + public: + using pointer = T*; + + using element_type = std::remove_extent_t; + + constexpr shared_ptr() noexcept = default; + + constexpr shared_ptr(std::nullptr_t) noexcept {} + + shared_ptr(const shared_ptr& r) noexcept + : m_ptr(r.m_ptr) + { + if (m_ptr) + d()->refs++; + } + + template >> + shared_ptr(const shared_ptr& r) noexcept + : m_ptr(r.m_ptr) + { + if (m_ptr) + d()->refs++; + } + + shared_ptr(shared_ptr&& r) noexcept + : m_ptr(r.m_ptr) + { + r.m_ptr = nullptr; + } + + template >> + shared_ptr(shared_ptr&& r) noexcept + : m_ptr(r.m_ptr) + { + r.m_ptr = nullptr; + } + + template >> + shared_ptr(single_ptr&& r) noexcept + : m_ptr(r.m_ptr) + { + r.m_ptr = nullptr; + } + + ~shared_ptr() + { + reset(); + } + + shared_ptr& operator=(const shared_ptr& r) noexcept + { + shared_ptr(r).swap(*this); + return *this; + } + + template >> + shared_ptr& operator=(const shared_ptr& r) noexcept + { + shared_ptr(r).swap(*this); + return *this; + } + + shared_ptr& operator=(shared_ptr&& r) noexcept + { + shared_ptr(std::move(r)).swap(*this); + return *this; + } + + template >> + shared_ptr& operator=(shared_ptr&& r) noexcept + { + shared_ptr(std::move(r)).swap(*this); + return *this; + } + + template >> + shared_ptr& operator=(single_ptr&& r) noexcept + { + shared_ptr(std::move(r)).swap(*this); + return *this; + } + + // Set to null + void reset() noexcept + { + if (m_ptr && !--d()->refs) [[unlikely]] + { + d()->destroy(d()); + m_ptr = nullptr; + } + } + + // Converts to unique (single) ptr if reference is 1, otherwise returns null. Nullifies self. + explicit operator single_ptr() && noexcept + { + if (m_ptr && !--d()->refs) + { + d()->refs.release(1); + return {std::move(*this)}; + } + + m_ptr = nullptr; + return {}; + } + + void swap(shared_ptr& r) noexcept + { + std::swap(this->m_ptr, r.m_ptr); + } + + element_type* get() const noexcept + { + return m_ptr; + } + + decltype(auto) operator*() const noexcept + { + if constexpr (std::is_void_v) + { + return; + } + else + { + return *m_ptr; + } + } + + element_type* operator->() const noexcept + { + return m_ptr; + } + + decltype(auto) operator[](std::ptrdiff_t idx) const noexcept + { + if constexpr (std::is_void_v) + { + return; + } + else if constexpr (std::is_array_v) + { + return m_ptr[idx]; + } + else + { + return *m_ptr; + } + } + + std::size_t use_count() const noexcept + { + if (m_ptr) + { + return d()->refs; + } + else + { + return 0; + } + } + + explicit constexpr operator bool() const noexcept + { + return m_ptr != nullptr; + } + + template (std::declval())), typename = std::enable_if_t>> + explicit operator shared_ptr() const noexcept + { + if (m_ptr) + { + d()->refs++; + } + + shared_ptr r; + r.m_ptr = m_ptr; + return r; + } + }; + + template + static std::enable_if_t && (!Init || !sizeof...(Args)), shared_ptr> make_shared(Args&&... args) noexcept + { + return make_single(std::forward(args)...); + } + + template + static std::enable_if_t, shared_ptr> make_shared(std::size_t count) noexcept + { + return make_single(count); + } + + // Atomic simplified shared pointer + template + class atomic_ptr + { + mutable atomic_t m_val{0}; + + static shared_data* d(uptr val) + { + return std::launder(static_cast*>(reinterpret_cast*>(val >> c_ref_size))); + } + + shared_data* d() const noexcept + { + return d(m_val); + } + + public: + using pointer = T*; + + using element_type = std::remove_extent_t; + + using shared_type = shared_ptr; + + constexpr atomic_ptr() noexcept = default; + + constexpr atomic_ptr(std::nullptr_t) noexcept {} + + explicit atomic_ptr(T value) noexcept + { + auto r = make_single(std::move(value)); + m_val = reinterpret_cast(std::exchange(r.m_ptr, nullptr)) << c_ref_size; + d()->refs += c_ref_mask; + } + + template >> + atomic_ptr(const shared_ptr& r) noexcept + : m_val(reinterpret_cast(r.m_ptr) << c_ref_size) + { + // Obtain a ref + as many refs as an atomic_ptr can additionally reference + if (m_val) + d()->refs += c_ref_mask + 1; + } + + template >> + atomic_ptr(shared_ptr&& r) noexcept + : m_val(reinterpret_cast(r.m_ptr) << c_ref_size) + { + r.m_ptr = nullptr; + + if (m_val) + d()->refs += c_ref_mask; + } + + template >> + atomic_ptr(single_ptr&& r) noexcept + : m_val(reinterpret_cast(r.m_ptr) << c_ref_size) + { + r.m_ptr = nullptr; + + if (m_val) + d()->refs += c_ref_mask; + } + + ~atomic_ptr() + { + const uptr v = m_val.raw(); + + if (v && !d(v)->refs.sub_fetch(c_ref_mask + 1 - (v & c_ref_mask))) + { + d(v)->destroy(d(v)); + } + } + + atomic_ptr& operator=(T value) noexcept + { + // TODO: does it make sense? + store(make_single(std::move(value))); + return *this; + } + + template >> + atomic_ptr& operator=(const shared_ptr& r) noexcept + { + store(r); + return *this; + } + + template >> + atomic_ptr& operator=(shared_ptr&& r) noexcept + { + store(std::move(r)); + return *this; + } + + template >> + atomic_ptr& operator=(single_ptr&& r) noexcept + { + store(std::move(r)); + return *this; + } + + shared_type load() const noexcept + { + shared_type r; + + // Add reference + const auto [prev, did_ref] = m_val.fetch_op([](uptr& val) + { + if (val >> c_ref_size) + { + val++; + return true; + } + + return false; + }); + + if (!did_ref) + { + // Null pointer + return r; + } + + // Set referenced pointer + r.m_ptr = std::launder(reinterpret_cast(prev >> c_ref_size)); + + // Dereference if same pointer + m_val.fetch_op([prev = prev](uptr& val) + { + if (val >> c_ref_size == prev >> c_ref_size) + { + val--; + return true; + } + + return false; + }); + + return r; + } + + void store(T value) noexcept + { + store(make_single(std::move(value))); + } + + void store(shared_type value) noexcept + { + if (value.m_ptr) + { + // Consume value and add refs + value.d()->refs += c_ref_mask; + } + + atomic_ptr old; + old.m_val.raw() = m_val.exchange(reinterpret_cast(std::exchange(value.m_ptr, nullptr)) << c_ref_size); + } + + [[nodiscard]] shared_type exchange(shared_type value) noexcept + { + atomic_ptr old; + + if (value.m_ptr) + { + // Consume value and add refs + value.d()->refs += c_ref_mask; + old.m_val.raw() += 1; + } + + old.m_val.raw() += m_val.exchange(reinterpret_cast(std::exchange(value.m_ptr, nullptr)) << c_ref_size); + + shared_type r; + r.m_ptr = old.m_val >> c_ref_size; + return r; + } + + // Simple atomic load is much more effective than load(), but it's a non-owning reference + const volatile void* observe() const noexcept + { + return reinterpret_cast(m_val >> c_ref_size); + } + + explicit constexpr operator bool() const noexcept + { + return m_val != 0; + } + + bool is_equal(const shared_ptr& r) const noexcept + { + return observe() == r.get(); + } + + bool is_equal(const single_ptr& r) const noexcept + { + return observe() == r.get(); + } + }; +} + +namespace std +{ + template + void swap(stx::single_ptr& lhs, stx::single_ptr& rhs) noexcept + { + lhs.swap(rhs); + } + + template + void swap(stx::shared_ptr& lhs, stx::shared_ptr& rhs) noexcept + { + lhs.swap(rhs); + } +} + +using stx::single_ptr; +using stx::shared_ptr; +using stx::atomic_ptr; +using stx::make_single; From 22a24446c1f3c0323f1f0155d964d4484a37dbb3 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 26 Nov 2020 16:30:17 +0300 Subject: [PATCH 4/5] Threads: fix minor race Has already been in "fixed" state in past... --- Utilities/Thread.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index a6f5e1df6843..64b5e73a880d 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -2010,7 +2010,10 @@ u64 thread_base::finalize(thread_state result_state) noexcept atomic_wait_engine::set_wait_callback(nullptr); - // Return true if need to delete thread object (no) + // Avoid race with the destructor + const u64 _self = m_thread; + + // Set result state (errored or finalized) const bool ok = 0 == (3 & ~m_sync.fetch_op([&](u64& v) { v &= -4; @@ -2020,8 +2023,7 @@ u64 thread_base::finalize(thread_state result_state) noexcept // Signal waiting threads m_sync.notify_all(2); - // No detached thread supported atm - return m_thread; + return _self; } thread_base::native_entry thread_base::finalize(u64 _self) noexcept From 089275899410a6c65e855f1671879814ab05938e Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 26 Nov 2020 08:33:50 +0300 Subject: [PATCH 5/5] Minor debugging enhancement Use raise(SIGTRAP) in failed SIGSEGV handling and return. --- Utilities/Thread.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 64b5e73a880d..106322d5bc17 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -1786,7 +1786,9 @@ static void signal_handler(int sig, siginfo_t* info, void* uct) noexcept if (IsDebuggerPresent()) { - __asm("int3;"); + // Convert to SIGTRAP + raise(SIGTRAP); + return; } report_fatal_error(msg);