diff --git a/benchmark/lib/CMakeLists.txt b/benchmark/lib/CMakeLists.txt index 53d95423..46fd98c8 100644 --- a/benchmark/lib/CMakeLists.txt +++ b/benchmark/lib/CMakeLists.txt @@ -9,7 +9,6 @@ target_sources(benchmark_common FILE_SET HEADERS BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} FILES - common.hpp fib.hpp uts.hpp macros.hpp diff --git a/benchmark/lib/common.hpp b/benchmark/lib/common.hpp deleted file mode 100644 index e981f9a1..00000000 --- a/benchmark/lib/common.hpp +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include - -// Use `import std;` by default. Textually `#include ` drags in -// ``, which triggers a libc++ 22 link-time bug (undefined -// `__atomic_unique_lock::__set_locked_bit`) in TUs that later instantiate -// anything touching std::stop_*. Targets that can't use modules (e.g. the -// openmp benchmarks, see benchmark/src/openmp/CMakeLists.txt) define -// LF_BENCH_NO_IMPORT_STD and get textual includes instead. -#ifdef LF_BENCH_NO_IMPORT_STD - #include - #include -#else -import std; -#endif - -inline void bench_thread_args(benchmark::Benchmark *bench, auto make_args) { - unsigned hw = std::max(1U, std::thread::hardware_concurrency()); - for (unsigned t : {1U, 2U, 4U, 6U, 8U, 12U, 16U, 24U, 32U, 48U, 64U, 96U}) { - if (t > hw) { - return; - } - make_args(bench, t); - } -} diff --git a/benchmark/lib/macros.hpp b/benchmark/lib/macros.hpp index f1084bf9..a32642f4 100644 --- a/benchmark/lib/macros.hpp +++ b/benchmark/lib/macros.hpp @@ -2,12 +2,17 @@ #include -#include "common.hpp" - +// Use `import std;` by default. Textually `#include ` drags in +// ``, which triggers a libc++ 22 link-time bug (undefined +// `__atomic_unique_lock::__set_locked_bit`) in TUs that later instantiate +// anything touching std::stop_*. Targets that can't use modules (e.g. the +// openmp benchmarks, see benchmark/src/openmp/CMakeLists.txt) define +// LF_BENCH_NO_IMPORT_STD and get textual includes instead. #ifdef LF_BENCH_NO_IMPORT_STD #include #include #include + #include #else import std; #endif @@ -16,6 +21,16 @@ import std; namespace lf_bench { +inline void bench_thread_args(benchmark::Benchmark *bench, auto make_args) { + unsigned hw = std::max(1U, std::thread::hardware_concurrency()); + for (unsigned t : {1U, 2U, 4U, 6U, 8U, 12U, 16U, 24U, 32U, 48U, 64U, 96U}) { + if (t > hw) { + return; + } + make_args(bench, t); + } +} + inline auto sanitize(std::string s) -> std::string { s.erase(std::remove(s.begin(), s.end(), ' '), s.end()); return s; diff --git a/benchmark/src/baremetal/fib.cpp b/benchmark/src/baremetal/fib.cpp index 46ce3c80..24733659 100644 --- a/benchmark/src/baremetal/fib.cpp +++ b/benchmark/src/baremetal/fib.cpp @@ -1,6 +1,5 @@ #include -#include "common.hpp" #include "fib.hpp" #include "macros.hpp" diff --git a/benchmark/src/libfork/fib.cpp b/benchmark/src/libfork/fib.cpp index 67b1cb73..beaace9c 100644 --- a/benchmark/src/libfork/fib.cpp +++ b/benchmark/src/libfork/fib.cpp @@ -1,6 +1,5 @@ #include -#include "common.hpp" #include "fib.hpp" #include "helpers.hpp" diff --git a/benchmark/src/libfork/uts.cpp b/benchmark/src/libfork/uts.cpp index 38a8d2e4..91ef3f79 100644 --- a/benchmark/src/libfork/uts.cpp +++ b/benchmark/src/libfork/uts.cpp @@ -1,6 +1,5 @@ #include -#include "common.hpp" #include "uts.hpp" #include "helpers.hpp" diff --git a/benchmark/src/serial/fib.cpp b/benchmark/src/serial/fib.cpp index a094a6d8..2b65104e 100644 --- a/benchmark/src/serial/fib.cpp +++ b/benchmark/src/serial/fib.cpp @@ -1,6 +1,5 @@ #include -#include "common.hpp" #include "fib.hpp" #include "macros.hpp" diff --git a/benchmark/src/serial/uts.cpp b/benchmark/src/serial/uts.cpp index b9aa12d6..291934e7 100644 --- a/benchmark/src/serial/uts.cpp +++ b/benchmark/src/serial/uts.cpp @@ -1,6 +1,5 @@ #include -#include "common.hpp" #include "macros.hpp" #include "uts.hpp" diff --git a/src/core/execute.cxx b/src/core/execute.cxx index eb0974d3..5eb503b0 100644 --- a/src/core/execute.cxx +++ b/src/core/execute.cxx @@ -35,7 +35,7 @@ constexpr void execute(Context &context, sched_handle handle) { thread_local_context = std::addressof(context); - defer _ = [] noexcept -> void { + defer _ = [] static noexcept -> void { thread_local_context = nullptr; }; @@ -60,7 +60,7 @@ constexpr void execute(Context &context, steal_handle handle) { thread_local_context = std::addressof(context); - defer _ = [] noexcept -> void { + defer _ = [] static noexcept -> void { thread_local_context = nullptr; }; diff --git a/src/core/ops.cxx b/src/core/ops.cxx index 63f3b73e..d88a0972 100644 --- a/src/core/ops.cxx +++ b/src/core/ops.cxx @@ -16,6 +16,25 @@ namespace lf { struct no_stop_t {}; struct no_ret_t {}; +// =============== Value-or-reference storage policy =============== // + +// For rvalue-reference arguments that are trivially copyable and fit in two +// pointer-sized words, store by value inside pkg instead of keeping a reference. +// This lets [[no_unique_address]] collapse empty functors to zero bytes and +// allows the compiler to treat the stored values as local data (no aliasing). +template +concept small_trivially_copyable = !std::is_reference_v // + && std::is_trivially_copyable_v // + && sizeof(T) <= 2 * sizeof(void *) // + && alignof(T) <= alignof(std::max_align_t); // + +// Only collapses rvalue refs; lvalue refs are kept as-is to preserve reference semantics. +template +using store_as_t = + std::conditional_t && small_trivially_copyable>, + std::remove_cvref_t, + T>; + // clang-format off template @@ -29,20 +48,21 @@ struct [[nodiscard("You should immediately co_await this!")]] pkg { // clang-format on /** - * @brief Forward the function member of a pkg correctly + * @brief Forward the function member of a pkg correctly. * - * The Fn member should be an l/r value reference, r-value reference need an - * explicit move to be forwarded correctly. + * Handles three cases: + * - rvalue reference Fn: move it. + * - lvalue reference Fn: return by reference. + * - value type Fn (small trivially-copyable stored directly): return by value. */ template constexpr auto fwd_fn(auto &&fn) noexcept -> Fn { - - static_assert(std::is_reference_v); - if constexpr (std::is_rvalue_reference_v) { return std::move(fn); - } else { + } else if constexpr (std::is_lvalue_reference_v || small_trivially_copyable) { return fn; + } else { + static_assert(false, "Invalid Fn type in fwd_fn"); } } @@ -68,10 +88,10 @@ template struct scope_ops : scope_base { private: template - using call_pkg = pkg; + using call_pkg = pkg, store_as_t...>; template - using fork_pkg = pkg; + using fork_pkg = pkg, store_as_t...>; public: // Default constructible @@ -137,10 +157,10 @@ template struct child_scope_ops : scope_base, stop_source { private: template - using call_pkg = pkg; + using call_pkg = pkg, store_as_t...>; template - using fork_pkg = pkg; + using fork_pkg = pkg, store_as_t...>; public: /** diff --git a/src/core/promise.cxx b/src/core/promise.cxx index 62a8d8a1..0b0cc9a8 100644 --- a/src/core/promise.cxx +++ b/src/core/promise.cxx @@ -504,9 +504,6 @@ struct mixin_frame { await_transform_pkg(this auto const &self, pkg &&pkg) noexcept( async_nothrow_invocable) -> awaitable { - // Required for noexcept specifier to be correct - static_assert(std::is_reference_v && (... && std::is_reference_v)); - using U = async_result_t; // clang-format off diff --git a/src/core/thread_locals.cxx b/src/core/thread_locals.cxx index 05d3d6db..67a27837 100644 --- a/src/core/thread_locals.cxx +++ b/src/core/thread_locals.cxx @@ -12,7 +12,7 @@ namespace lf { template constinit inline thread_local Context *thread_local_context = nullptr; -// TODO: implictaions of thread local on constexpr +// TODO: implications of thread local on constexpr /** * @brief A getter for the current worker context, checks for null in debug. @@ -22,6 +22,9 @@ constexpr auto get_tls_context() noexcept -> Context & { return *not_null(thread_local_context); } +/** + * @brief A getter for the current worker context's stack, checks for null in debug. + */ template constexpr auto get_tls_stack() noexcept -> stack_t & { return get_tls_context().stack();