diff --git a/Utilities/cond.cpp b/Utilities/cond.cpp index 718ce5cb30d2..c806549b3663 100644 --- a/Utilities/cond.cpp +++ b/Utilities/cond.cpp @@ -273,6 +273,159 @@ void shared_cond::imp_notify() noexcept balanced_awaken(m_cvx32, utils::popcnt32(wait_mask)); } +void shared_cond::wait_all() noexcept +{ + // Try to acquire waiting state without locking but only if there are other locks + const auto [old_, result] = m_cvx32.fetch_op([](u64& cvx32) -> u64 + { + // Check waiting alone + if ((cvx32 & 0xffffffff) == 0) + { + return 0; + } + + // Combine used bits and invert to find least significant bit unused + const u32 slot = utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true); + + // Set waiting bit (does nothing if all slots are used) + cvx32 |= (1ull << slot) & 0xffffffff; + return 1ull << slot; + }); + + if (!result) + { + return; + } + + if (result > 0xffffffffu) + { + // All slots are used, fallback to spin wait + while (m_cvx32 & 0xffffffff) + { + busy_wait(); + } + + return; + } + + const u64 wait_bit = result; + const u64 lock_bit = wait_bit | (wait_bit << 32); + + balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int + { + if ((cvx32 & wait_bit) == 0) + { + // Remove signal and unlock at once + cvx32 &= ~lock_bit; + return +1; + } + + if constexpr (sizeof...(ret)) + { + cvx32 &= ~lock_bit; + return -1; + } + + return 0; + }); +} + +bool shared_cond::wait_all(shared_cond::shared_lock& lock) noexcept +{ + AUDIT(lock.m_this == this); + + if (lock.m_slot >= 32) + { + // Invalid argument, assume notified + return true; + } + + const u64 wait_bit = c_wait << lock.m_slot; + const u64 lock_bit = c_lock << lock.m_slot; + + // Try to acquire waiting state only if there are other locks + const auto [old_, not_alone] = m_cvx32.fetch_op([&](u64& cvx32) + { + // Check locking alone + if ((cvx32 >> 32) == (lock_bit >> 32)) + { + return false; + } + + // c_lock -> c_wait, c_sig -> unlock + cvx32 &= ~(lock_bit & ~wait_bit); + return true; + }); + + if (!not_alone) + { + return false; + } + else + { + // Set invalid slot to acknowledge unlocking + lock.m_slot = 33; + } + + if ((old_ & wait_bit) == 0) + { + // Already signaled, return without waiting + return true; + } + + balanced_wait_until(m_cvx32, -1, [&](u64& cvx32, auto... ret) -> int + { + if ((cvx32 & wait_bit) == 0) + { + // Remove signal and unlock at once + cvx32 &= ~lock_bit; + return +1; + } + + if constexpr (sizeof...(ret)) + { + cvx32 &= ~lock_bit; + return -1; + } + + return 0; + }); + + return true; +} + +void shared_cond::notify_all(shared_cond::shared_lock& lock) noexcept +{ + AUDIT(lock.m_this == this); + + const u64 slot_mask = c_sig << lock.m_slot; + + auto [old, ok] = m_cvx32.fetch_op([&](u64& cvx32) + { + if (const u64 sig_mask = cvx32 & 0xffffffff) + { + cvx32 &= (0xffffffffull << 32) & ~slot_mask; + cvx32 |= (sig_mask << 32) & ~slot_mask; + return true; + } + + return false; + }); + + // Set invalid slot to acknowledge unlocking + lock.m_slot = 34; + + // Determine if some waiters need a syscall notification + const u64 wait_mask = old & (~old >> 32); + + if (UNLIKELY(!ok || !wait_mask)) + { + return; + } + + balanced_awaken(m_cvx32, utils::popcnt32(wait_mask)); +} + bool lf_queue_base::wait(u64 _timeout) { auto _old = m_head.compare_and_swap(0, 1); diff --git a/Utilities/cond.h b/Utilities/cond.h index dc716fab88c8..0938ca72266b 100644 --- a/Utilities/cond.h +++ b/Utilities/cond.h @@ -206,7 +206,7 @@ class shared_cond m_slot = m_this->m_cvx32.atomic_op([](u64& cvx32) { // Combine used bits and invert to find least significant bit unused - const u32 slot = utils::cnttz32(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true); + const u32 slot = utils::cnttz64(~((cvx32 & 0xffffffff) | (cvx32 >> 32)), true); // Set lock bits (does nothing if all slots are used) const u64 bit = (1ull << slot) & 0xffffffff; @@ -261,6 +261,10 @@ class shared_cond return imp_wait(lock.m_slot, usec_timeout); } + void wait_all() noexcept; + + bool wait_all(shared_lock& lock) noexcept; + void notify_all() noexcept { if (LIKELY(!m_cvx32)) @@ -268,4 +272,6 @@ class shared_cond imp_notify(); } + + void notify_all(shared_lock& lock) noexcept; }; diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 150a197f785b..cb2ea0fb39c9 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -19,10 +19,13 @@ void fmt_class_string::format(std::string& out, u64 arg) { case cpu_flag::stop: return "STOP"; case cpu_flag::exit: return "EXIT"; + case cpu_flag::wait: return "w"; + case cpu_flag::pause: return "p"; case cpu_flag::suspend: return "s"; case cpu_flag::ret: return "ret"; case cpu_flag::signal: return "sig"; case cpu_flag::memory: return "mem"; + case cpu_flag::jit_return: return "JIT"; case cpu_flag::dbg_global_pause: return "G-PAUSE"; case cpu_flag::dbg_global_stop: return "G-EXIT"; case cpu_flag::dbg_pause: return "PAUSE"; @@ -42,10 +45,37 @@ void fmt_class_string>::format(std::string& out, u64 arg) thread_local cpu_thread* g_tls_current_cpu_thread = nullptr; -void cpu_thread::operator()() +// Pseudo-lock for coordination +alignas(64) shared_cond g_cpu_array_lock; + +// Semaphore for global thread array (global counter) +alignas(64) atomic_t g_cpu_array_sema; + +// Semaphore subdivision for each array slot (64 x N in total) +atomic_t g_cpu_array_bits[6]{}; + +// All registered threads +atomic_t g_cpu_array[sizeof(g_cpu_array_bits) * 8]{}; + +template +void for_all_cpu(F&& func) noexcept { - state -= cpu_flag::exit; + for (u32 i = 0; i < ::size32(g_cpu_array_bits); i++) + { + for (u64 bits = g_cpu_array_bits[i]; bits; bits &= bits - 1) + { + const u64 index = i * 64 + utils::cnttz64(bits, true); + + if (cpu_thread* cpu = g_cpu_array[index].load()) + { + func(cpu); + } + } + } +} +void cpu_thread::operator()() +{ g_tls_current_cpu_thread = this; if (g_cfg.core.thread_scheduler_enabled) @@ -58,6 +88,48 @@ void cpu_thread::operator()() thread_ctrl::set_native_priority(-1); } + // Register thread in g_cpu_array + if (!g_cpu_array_sema.try_inc(sizeof(g_cpu_array_bits) * 8)) + { + LOG_FATAL(GENERAL, "Too many threads"); + Emu.Pause(); + return; + } + + u64 array_slot = -1; + + for (u32 i = 0;; i = (i + 1) % ::size32(g_cpu_array_bits)) + { + if (LIKELY(~g_cpu_array_bits[i])) + { + const u64 found = g_cpu_array_bits[i].atomic_op([](u64& bits) -> u64 + { + // Find empty array slot and set its bit + if (LIKELY(~bits)) + { + const u64 bit = utils::cnttz64(~bits, true); + bits |= 1ull << bit; + return bit; + } + + return 64; + }); + + if (LIKELY(found < 64)) + { + // Fixup + array_slot = i * 64 + found; + break; + } + } + } + + // Register and wait if necessary + verify("g_cpu_array[...] -> this" HERE), g_cpu_array[array_slot].exchange(this) == nullptr; + + state += cpu_flag::wait; + g_cpu_array_lock.wait_all(); + // Check thread status while (!(state & (cpu_flag::exit + cpu_flag::dbg_global_stop))) { @@ -86,6 +158,13 @@ void cpu_thread::operator()() thread_ctrl::wait(); } + + // Unregister and wait if necessary + state += cpu_flag::wait; + verify("g_cpu_array[...] -> null" HERE), g_cpu_array[array_slot].exchange(nullptr) == this; + g_cpu_array_bits[array_slot / 64] &= ~(1ull << (array_slot % 64)); + g_cpu_array_sema--; + g_cpu_array_lock.wait_all(); } void cpu_thread::on_abort() @@ -105,7 +184,7 @@ cpu_thread::cpu_thread(u32 id) g_threads_created++; } -bool cpu_thread::check_state() +bool cpu_thread::check_state() noexcept { #ifdef WITH_GDB_DEBUGGER if (state & cpu_flag::dbg_pause) @@ -116,6 +195,17 @@ bool cpu_thread::check_state() bool cpu_sleep_called = false; bool cpu_flag_memory = false; + bool cpu_flag_wait = false; + + if (state & cpu_flag::wait) + { + cpu_flag_wait = true; + } + else if (state & cpu_flag::pause) + { + state += cpu_flag::wait; + cpu_flag_wait = true; + } while (true) { @@ -131,8 +221,9 @@ bool cpu_thread::check_state() state -= cpu_flag::memory; } - if (state & cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop) + if (state & (cpu_flag::exit + cpu_flag::jit_return + cpu_flag::dbg_global_stop)) { + state += cpu_flag::wait; return true; } @@ -141,7 +232,20 @@ bool cpu_thread::check_state() cpu_sleep_called = false; } - if (!is_paused()) + const auto [state0, escape] = state.fetch_op([&](bs_t& flags) + { + // Check pause flags which hold thread inside check_state + if (flags & (cpu_flag::pause + cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause)) + { + return false; + } + + // Atomically clean wait flag and escape + flags -= cpu_flag::wait; + return true; + }); + + if (escape) { if (cpu_flag_memory) { @@ -150,14 +254,46 @@ bool cpu_thread::check_state() break; } - else if (!cpu_sleep_called && state & cpu_flag::suspend) + else if (!cpu_sleep_called && state0 & cpu_flag::suspend) { cpu_sleep(); cpu_sleep_called = true; continue; } - thread_ctrl::wait(); + if (!cpu_flag_wait) + { + state += cpu_flag::wait; + cpu_flag_wait = true; + + // Spin wait once for a bit before resorting to thread_ctrl::wait + for (u32 i = 0; i < 10; i++) + { + if (state0 & (cpu_flag::pause + cpu_flag::suspend)) + { + busy_wait(500); + } + else + { + break; + } + } + + if (!(state0 & (cpu_flag::pause + cpu_flag::suspend))) + { + continue; + } + } + + if (state0 & (cpu_flag::suspend + cpu_flag::dbg_global_pause + cpu_flag::dbg_pause)) + { + thread_ctrl::wait(); + } + else + { + // If only cpu_flag::pause was set, notification won't arrive + g_cpu_array_lock.wait_all(); + } } const auto state_ = state.load(); @@ -196,3 +332,67 @@ std::string cpu_thread::dump() const { return fmt::format("Type: %s\n" "State: %s\n", typeid(*this).name(), state.load()); } + +cpu_thread::suspend_all::suspend_all(cpu_thread* _this) noexcept + : m_lock(g_cpu_array_lock.try_shared_lock()) + , m_this(_this) +{ + // TODO + if (!m_lock) + { + LOG_FATAL(GENERAL, "g_cpu_array_lock: too many concurrent accesses"); + Emu.Pause(); + return; + } + + if (m_this) + { + m_this->state += cpu_flag::wait; + } + + for_all_cpu([](cpu_thread* cpu) + { + cpu->state += cpu_flag::pause; + }); + + busy_wait(500); + + while (true) + { + bool ok = true; + + for_all_cpu([&](cpu_thread* cpu) + { + if (!(cpu->state & cpu_flag::wait)) + { + ok = false; + } + }); + + if (LIKELY(ok)) + { + break; + } + + busy_wait(1000); + } +} + +cpu_thread::suspend_all::~suspend_all() +{ + // Make sure latest worker does cleanup and notifies others + if (!g_cpu_array_lock.wait_all(m_lock)) + { + for_all_cpu([](cpu_thread* cpu) + { + cpu->state -= cpu_flag::pause; + }); + + g_cpu_array_lock.notify_all(m_lock); + } + + if (m_this) + { + m_this->check_state(); + } +} diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 7eb3fdf63373..a1f3af46e9d1 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -2,12 +2,15 @@ #include "../Utilities/Thread.h" #include "../Utilities/bit_set.h" +#include "../Utilities/cond.h" // Thread state flags enum class cpu_flag : u32 { stop, // Thread not running (HLE, initial state) exit, // Irreversible exit + wait, // Indicates waiting state, set by the thread itself + pause, // Thread suspended by suspend_all technique suspend, // Thread suspended ret, // Callback return requested signal, // Thread received a signal (HLE) @@ -39,15 +42,15 @@ class cpu_thread const u32 id; // Public thread state - atomic_bs_t state{+cpu_flag::stop}; + atomic_bs_t state{cpu_flag::stop + cpu_flag::wait}; // Process thread state, return true if the checker must return - bool check_state(); + bool check_state() noexcept; // Process thread state (pause) [[nodiscard]] bool test_stopped() { - if (UNLIKELY(state)) + if (state) { if (check_state()) { @@ -99,6 +102,20 @@ class cpu_thread // Callback for vm::temporary_unlock virtual void cpu_unmem() {} + + // Thread locker + class suspend_all + { + decltype(std::declval().try_shared_lock()) m_lock; + + cpu_thread* m_this; + + public: + suspend_all(cpu_thread* _this) noexcept; + suspend_all(const suspend_all&) = delete; + suspend_all& operator=(const suspend_all&) = delete; + ~suspend_all(); + }; }; inline cpu_thread* get_current_cpu_thread() noexcept diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 2fbc73150117..6fc7b839a537 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -343,7 +343,6 @@ const auto spu_putllc_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) +const auto spu_getll_tx = build_function_asm([](asmjit::X86Assembler& c, auto& args) { using namespace asmjit; @@ -558,7 +551,6 @@ const auto spu_getll_tx = build_function_asm= 10) { - LOG_ERROR(SPU, "%s took too long: %u", args.cmd, count); + LOG_WARNING(SPU, "%s took too long: %u", args.cmd, count); + } + + if (count >= 16) + { + cpu_thread::suspend_all cpu_lock(this); + + while (atomic_storage::bts(vm::reservation_acquire(addr, 128).raw(), 6)) + { + busy_wait(100); + } + + mov_rdata(vm::_ref(addr).data(), to_write.data()); + vm::reservation_acquire(addr, 128) += 63; } } else @@ -1847,6 +1855,8 @@ bool spu_thread::process_mfc_cmd() // Stall infinitely if MFC queue is full while (UNLIKELY(mfc_size >= 16)) { + state += cpu_flag::wait; + if (is_stopped()) { return false; @@ -1875,6 +1885,8 @@ bool spu_thread::process_mfc_cmd() while (rdata == data && (vm::reservation_acquire(addr, 128)) == rtime) { + state += cpu_flag::wait; + if (is_stopped()) { break; @@ -1882,17 +1894,39 @@ bool spu_thread::process_mfc_cmd() thread_ctrl::wait_for(100); } + + if (test_stopped()) + { + return false; + } } - if (LIKELY(g_use_rtm)) + if (LIKELY(g_use_rtm && !g_cfg.core.spu_accurate_getllar)) { - const u64 count = g_cfg.core.spu_accurate_getllar ? spu_getll_tx(addr, dst.data(), &ntime) : spu_getll_fast(addr, dst.data(), &ntime); + const u64 count = spu_getll_fast(addr, dst.data(), &ntime); if (count >= 10) { LOG_ERROR(SPU, "%s took too long: %u", ch_mfc_cmd.cmd, count); } } + else if (g_use_rtm) + { + ntime = spu_getll_tx(addr, dst.data()); + + if (ntime == 1) + { + cpu_thread::suspend_all cpu_lock(this); + + while (vm::reservation_acquire(addr, 128) & 127) + { + busy_wait(100); + } + + ntime = vm::reservation_acquire(addr, 128); + mov_rdata(dst.data(), data.data()); + } + } else { auto& res = vm::reservation_lock(addr, 128); @@ -1968,7 +2002,34 @@ bool spu_thread::process_mfc_cmd() if (count >= 10) { - LOG_ERROR(SPU, "%s took too long: %u (r=%u)", ch_mfc_cmd.cmd, count, result); + LOG_WARNING(SPU, "%s took too long: %u (r=%u)", ch_mfc_cmd.cmd, count, result); + } + + if (count >= 16 && !result) + { + cpu_thread::suspend_all cpu_lock(this); + + while (atomic_storage::bts(vm::reservation_acquire(addr, 128).raw(), 6)) + { + busy_wait(100); + } + + auto& data = vm::_ref(addr); + + if ((vm::reservation_acquire(addr, 128) & -128) == rtime && rdata == data) + { + mov_rdata(data.data(), to_write.data()); + vm::reservation_acquire(addr, 128) += 63; + result = 1; + } + else + { + vm::reservation_acquire(addr, 128) -= 65; + } + } + else if (count && !result) + { + vm::reservation_acquire(addr, 128) -= 1; } } else if (auto& data = vm::_ref(addr); rdata == data) @@ -2256,6 +2317,11 @@ s64 spu_thread::get_ch_value(u32 ch) auto read_channel = [&](spu_channel& channel) -> s64 { + if (channel.get_count() == 0) + { + state += cpu_flag::wait; + } + for (int i = 0; i < 10 && channel.get_count() == 0; i++) { busy_wait(); @@ -2273,6 +2339,7 @@ s64 spu_thread::get_ch_value(u32 ch) thread_ctrl::wait(); } + check_state(); return out; }; @@ -2284,6 +2351,11 @@ s64 spu_thread::get_ch_value(u32 ch) } case SPU_RdInMbox: { + if (ch_in_mbox.get_count() == 0) + { + state += cpu_flag::wait; + } + while (true) { for (int i = 0; i < 10 && ch_in_mbox.get_count() == 0; i++) @@ -2300,6 +2372,7 @@ s64 spu_thread::get_ch_value(u32 ch) int_ctrl[2].set(SPU_INT2_STAT_SPU_MAILBOX_THRESHOLD_INT); } + check_state(); return out; } @@ -2410,6 +2483,8 @@ s64 spu_thread::get_ch_value(u32 ch) while (res = get_events(), !res) { + state += cpu_flag::wait; + if (is_stopped()) { return -1; @@ -2418,11 +2493,14 @@ s64 spu_thread::get_ch_value(u32 ch) pseudo_lock.wait(100); } + check_state(); return res; } while (res = get_events(true), !res) { + state += cpu_flag::wait; + if (is_stopped()) { return -1; @@ -2431,6 +2509,7 @@ s64 spu_thread::get_ch_value(u32 ch) thread_ctrl::wait_for(100); } + check_state(); return res; } @@ -2463,6 +2542,8 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) { while (!ch_out_intr_mbox.try_push(value)) { + state += cpu_flag::wait; + if (is_stopped()) { return false; @@ -2472,6 +2553,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) } int_ctrl[2].set(SPU_INT2_STAT_MAILBOX_INT); + check_state(); return true; } @@ -2609,6 +2691,8 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) { while (!ch_out_mbox.try_push(value)) { + state += cpu_flag::wait; + if (is_stopped()) { return false; @@ -2617,6 +2701,7 @@ bool spu_thread::set_ch_value(u32 ch, u32 value) thread_ctrl::wait(); } + check_state(); return true; } @@ -2808,6 +2893,8 @@ bool spu_thread::stop_and_signal(u32 code) // HACK: wait for executable code while (!_ref(pc)) { + state += cpu_flag::wait; + if (is_stopped()) { return false; @@ -2816,12 +2903,15 @@ bool spu_thread::stop_and_signal(u32 code) thread_ctrl::wait_for(1000); } + check_state(); return false; } case 0x001: { + state += cpu_flag::wait; thread_ctrl::wait_for(1000); // hack + check_state(); return true; } @@ -2857,6 +2947,8 @@ bool spu_thread::stop_and_signal(u32 code) std::shared_ptr queue; + state += cpu_flag::wait; + while (true) { queue.reset(); @@ -2897,6 +2989,7 @@ bool spu_thread::stop_and_signal(u32 code) if (!queue) { + check_state(); return ch_in_mbox.set_values(1, CELL_EINVAL), true; // TODO: check error value } @@ -2927,6 +3020,7 @@ bool spu_thread::stop_and_signal(u32 code) const auto data3 = static_cast(std::get<3>(event)); ch_in_mbox.set_values(4, CELL_OK, data1, data2, data3); queue->events.pop_front(); + check_state(); return true; } } @@ -2972,6 +3066,7 @@ bool spu_thread::stop_and_signal(u32 code) } } + check_state(); return true; } diff --git a/rpcs3/Emu/Cell/lv2/sys_net.cpp b/rpcs3/Emu/Cell/lv2/sys_net.cpp index 8d08b6aa1023..ea3d87d302e2 100644 --- a/rpcs3/Emu/Cell/lv2/sys_net.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_net.cpp @@ -357,6 +357,11 @@ s32 sys_net_bnet_accept(ppu_thread& ppu, s32 s, vm::ptr addr, } } + if (ppu.is_stopped()) + { + return 0; + } + auto newsock = std::make_shared(native_socket); result = idm::import_existing(newsock); @@ -975,6 +980,11 @@ s32 sys_net_bnet_recvfrom(ppu_thread& ppu, s32 s, vm::ptr buf, u32 len, s3 } } + if (ppu.is_stopped()) + { + return 0; + } + // TODO if (addr) { @@ -1796,6 +1806,11 @@ s32 sys_net_bnet_select(ppu_thread& ppu, s32 nfds, vm::ptr readf } } + if (ppu.is_stopped()) + { + return 0; + } + if (readfds) *readfds = rread; if (writefds) diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index 0e4fed623e01..1a6dd0a6239c 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -172,6 +172,8 @@ namespace vm void temporary_unlock(cpu_thread& cpu) noexcept { + cpu.state += cpu_flag::wait; + if (g_tls_locked && g_tls_locked->compare_and_swap_test(&cpu, nullptr)) { cpu.cpu_unmem(); diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index 8ae7c33f05cb..1bc23dfebd67 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -379,7 +379,6 @@ struct cfg_root : cfg::node cfg::_enum spu_block_size{this, "SPU Block Size", spu_block_size_type::safe}; cfg::_bool spu_accurate_getllar{this, "Accurate GETLLAR", false}; cfg::_bool spu_accurate_putlluc{this, "Accurate PUTLLUC", false}; - cfg::_bool spu_accurate_putllc{this, "Accurate PUTLLC", false}; cfg::_bool spu_verification{this, "SPU Verification", true}; // Should be enabled cfg::_bool spu_cache{this, "SPU Cache", true}; cfg::_enum enable_TSX{this, "Enable TSX", tsx_usage::enabled}; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully