-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc++] Improve test coverage for copy/move ctors for vector<bool> #120132
Conversation
@llvm/pr-subscribers-libcxx Author: Peng Liu (winner245) ChangesGeneral descriptionThis PR is part of a series aimed at significantly improving the performance of Current PRThis PR significantly improves the performance of various constructors in
Before:
After:
Full diff: https://github.com/llvm/llvm-project/pull/120132.diff 3 Files Affected:
diff --git a/libcxx/include/__vector/vector_bool.h b/libcxx/include/__vector/vector_bool.h
index 36eb7f350ac406..d7f9ba78a7af80 100644
--- a/libcxx/include/__vector/vector_bool.h
+++ b/libcxx/include/__vector/vector_bool.h
@@ -398,6 +398,8 @@ class _LIBCPP_TEMPLATE_VIS vector<bool, _Allocator> {
__guard.__complete();
}
+ _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __alloc_and_copy(const vector& __v);
+
template <class _Iterator, class _Sentinel>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void __assign_with_sentinel(_Iterator __first, _Sentinel __last);
@@ -674,25 +676,30 @@ vector<bool, _Allocator>::vector(initializer_list<value_type> __il, const alloca
#endif // _LIBCPP_CXX03_LANG
+// This function copies each storage word as a whole, which is substantially more efficient than copying
+// individual bits within each word
+template <class _Allocator>
+_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void vector<bool, _Allocator>::__alloc_and_copy(const vector& __v) {
+ if (__v.__size_) {
+ __vallocate(__v.__size_);
+ std::copy(__v.__begin_, __v.__begin_ + __external_cap_to_internal(__v.__size_), __begin_);
+ }
+ __size_ = __v.__size_;
+}
+
template <class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(const vector& __v)
: __begin_(nullptr),
__size_(0),
__cap_(0),
__alloc_(__storage_traits::select_on_container_copy_construction(__v.__alloc_)) {
- if (__v.size() > 0) {
- __vallocate(__v.size());
- __construct_at_end(__v.begin(), __v.end(), __v.size());
- }
+ __alloc_and_copy(__v);
}
template <class _Allocator>
_LIBCPP_CONSTEXPR_SINCE_CXX20 vector<bool, _Allocator>::vector(const vector& __v, const allocator_type& __a)
: __begin_(nullptr), __size_(0), __cap_(0), __alloc_(__a) {
- if (__v.size() > 0) {
- __vallocate(__v.size());
- __construct_at_end(__v.begin(), __v.end(), __v.size());
- }
+ __alloc_and_copy(__v);
}
template <class _Allocator>
@@ -737,9 +744,8 @@ vector<bool, _Allocator>::vector(vector&& __v, const __type_identity_t<allocator
this->__cap_ = __v.__cap_;
__v.__begin_ = nullptr;
__v.__cap_ = __v.__size_ = 0;
- } else if (__v.size() > 0) {
- __vallocate(__v.size());
- __construct_at_end(__v.begin(), __v.end(), __v.size());
+ } else {
+ __alloc_and_copy(__v);
}
}
diff --git a/libcxx/test/benchmarks/containers/ContainerBenchmarks.h b/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
index 6d21e12896ec9e..2e83fc68ef4d22 100644
--- a/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
+++ b/libcxx/test/benchmarks/containers/ContainerBenchmarks.h
@@ -39,6 +39,36 @@ void BM_CopyConstruct(benchmark::State& st, Container) {
}
}
+template <class Container>
+void BM_MoveConstruct(benchmark::State& st, Container) {
+ auto size = st.range(0);
+ Container c(size);
+ for (auto _ : st) {
+ auto v = std::move(c);
+ DoNotOptimizeData(v);
+ }
+}
+
+template <class Container, class Allocator>
+void BM_CopyConstruct_Alloc(benchmark::State& st, Container, Allocator a) {
+ auto size = st.range(0);
+ Container c(size);
+ for (auto _ : st) {
+ Container v(c, a);
+ DoNotOptimizeData(v);
+ }
+}
+
+template <class Container, class Allocator>
+void BM_MoveConstruct_Alloc(benchmark::State& st, Container, Allocator a) {
+ auto size = st.range(0);
+ Container c(size);
+ for (auto _ : st) {
+ Container v(std::move(c), a);
+ DoNotOptimizeData(v);
+ }
+}
+
template <class Container>
void BM_Assignment(benchmark::State& st, Container) {
auto size = st.range(0);
diff --git a/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp b/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
new file mode 100644
index 00000000000000..7de6b344d43943
--- /dev/null
+++ b/libcxx/test/benchmarks/containers/vector_bool_operations.bench.cpp
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <deque>
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "benchmark/benchmark.h"
+#include "ContainerBenchmarks.h"
+#include "../GenerateInput.h"
+#include "test_allocator.h"
+
+using namespace ContainerBenchmarks;
+
+BENCHMARK_CAPTURE(BM_CopyConstruct, vector_bool, std::vector<bool>{})->Arg(5140480);
+BENCHMARK_CAPTURE(BM_MoveConstruct, vector_bool, std::vector<bool>{})->Arg(5140480);
+BENCHMARK_CAPTURE(
+ BM_CopyConstruct_Alloc, vector_bool, std::vector<bool, test_allocator<bool>>(), test_allocator<bool>(3))
+ ->Arg(5140480);
+BENCHMARK_CAPTURE(
+ BM_MoveConstruct_Alloc, vector_bool, std::vector<bool, test_allocator<bool>>(), test_allocator<bool>(3))
+ ->Arg(5140480);
+
+BENCHMARK_MAIN();
\ No newline at end of file
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should instead fix #64038.
Thank you for your feedback. If I understand correctly, you are suggesting that I work on the general |
Yes, basically. The overload is implemented there, but is currently not called from the copy constructor because that (indirectly) uses the internal |
We should prioritize fixing the bitwise algorithms in #64038. I have already submitted PRs to address this, but I believe the approach in this PR presents several advantages:
Given the above concerns, I believe that proceeding with this PR remains a valuable choice. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with comments.
I do think we should go for the general-purpose optimizations in std::copy
and friends for vector<bool>::iterator
, however I think this patch is simple and doesn't preclude that optimization from being made. Worst case, if we think this adds any maintenance burden in the long run (unlikely), we could go back and call std::copy(__v.begin(), __v.end(), begin())
instead in these call sites and remove the bit of logic related to __external_cap_to_internal
.
I'm personally not a huge fan of this, since it adds another code path you have to know about to understand what vector does, especially since this doesn't improve anything over calling |
@winner245 I disagree. This patch means more code without an obvious benefit. Since we will have the bitwise versions anyways, I don't think the points you make actually apply. Having essentially the same (or a better optimization) twice yields its own problems, since optimizations are often only applied in one case. I don't know whether that will actually be a problem, but there have been lots of places where I though there was no optimiation to gain and it turned out I was wrong. On the reliability side, given that the two cases you mention are either something that has been addressed by the standard itself (making it a questionable decision to exploit it for any implementation) and a compilation error which no one ran into in the last decade or so (AFAICT) I doubt there is much of a real problem. I agree these are in fact bugs, I just question how wide of an impact they actually have. (Note that I'm adding even more constant folding in #106225 to work around more constant folding problems) |
I suggest we then wait for the underlying regression fix to land so we can reevaluate this patch. At that point, this would become a pure NFC refactoring IIUC, and we can evaluate it on that basis. |
Yes, that should be the case. If it's not then this would probably get my approval as well. (Also, how the hell does github work? I wrote the second comment before the first one, but GH deleted that for me and I didn't feel like re-writing it, so I wrote a shorter message) |
Thanks @philnik777. I think your concern is valid, and I am fine with that. In that case, we will need to land #121013 first. Then this one becomes NFC or I might need to improve the tests for the copy/move constructors as the existing ones only considered up to 3 bytes, which is less than the word size of 8 bytes. |
ead9ab1
to
0c7e214
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with comments applied. I would also like @philnik777 to take another look.
Personally, I find the code a lot easier to read after this PR. We call std::copy
directly, which everyone knows well, and we set the size explicitly from the constructor. That removes the need for calling __construct_at_end
, which has a strange contract like that of only being called when the vector is empty.
libcxx/test/std/containers/sequences/vector.bool/copy_alloc.pass.cpp
Outdated
Show resolved
Hide resolved
d59fc21
to
64a198d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with comments applied. I would also like @philnik777 to take another look.
Personally, I find the code a lot easier to read after this PR. We call
std::copy
directly, which everyone knows well, and we set the size explicitly from the constructor. That removes the need for calling__construct_at_end
, which has a strange contract like that of only being called when the vector is empty.
What strange contract are you talking about? AFAICT the only precondition is that there is enough space to construct the elements.
I don't really care whether we call __construct_at_end
or use std::copy
directly, but I still find std::copy(__v.begin(), __v.end(), begin())
much more readable than the current proposal. If the only reason for this PR is readability I don't see how the current proposal is any better than the status quo.
Before my patch #119632,
Please note that we are not comparing __construct_at_end(__v.begin(), __v.end(), __v.size()); with std::copy(__v.__begin_, __v.__begin_ + __external_cap_to_internal(__v.size()), __begin_);
__size_ = __v.size(); I have no idea what |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@philnik777 If this patch uses std::copy(__v.begin(), __v.end(), begin())
, we need to handle the leading bits in the last word like __construct_at_end
does:
if (end().__ctz_ != 0)
std::fill_n(end(), __bits_per_word - end().__ctz_, 0);
Then, the benefit of this patch basically goes away since we'd be better off just calling __construct_at_end
and not inlining anything.
That being said, since we are failing to gain consensus to move forward with this patch, I would suggest that we take the test improvements in this patch and land them, and not move forward with the refactoring part of the patch. After the recent improvements to std::vector<bool>
optimizations, the refactoring part of this patch doesn't provide as much value, and I think our time may be better spent on something else than trying to gain consensus on a minor stylistic refactoring.
bdccc14
to
84025be
Compare
I've dropped all commits related to algorithm changes and refactoring, and retained only the test enhancements. Accordingly, I've updated the PR description and revised the title to reflect these changes. |
84025be
to
dd0c05b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks! Very minor nitpicky comments, please feel free to land after making those changes.
libcxx/test/std/containers/sequences/vector.bool/move_alloc.pass.cpp
Outdated
Show resolved
Hide resolved
dd0c05b
to
f18c026
Compare
The current tests for
vector<bool>
fail to adequately cover realistic scenarios, as they are limited to cases of up to 3 bytes, representing less than 1/2 of a word size on a 64-bit system. However, mostvector<bool>
operations rely on code paths triggered only when handling multiple storage words (8 bytes each). To address this gap, this PR rewrites the tests for copy and move constructors, as well as allocator-extended copy and move constructors, ensuring that previously untested code paths are now thoroughly validated.Note: This PR now focuses on improving test coverage, as the performance enhancements have been addressed in separate patches that optimize algorithms
ranges::{copy, move}
for__bit_iterator
s (e.g., #121013, #121109). For completeness, I have retained the history of this patch to provide full context.General descriptionThis PR is part of a series aimed at significantly improving the performance ofvector<bool>
. Each PR focuses on enhancing a specific subset of operations, ensuring they are self-contained and easy to review. The main idea for performance improvements involves using word-wise implementation along with bit manipulation techniques, rather than solely using bit-wise operations in the previous implementation, resulting in substantial performance gains.### Current PRThis PR significantly improves the performance of various constructors invector<bool>
by at least 500x under a variety of practical storage word sizes. The improvements for the above tests are:- Copy constructor: 576x- Copy constructor with extended allocator: 546x- Move constructor with extended allocator: 568x### TestsThe existing tests forvector<bool>
are largely insufficient to cover realistic scenarios. They are limited to cases of up to 1~2 bytes. However, mostvector<bool>
operations involve code paths that are executed only when multiple storage words, 8 bytes each, are involved. To fix the test coverage, this PR completely rewrites the constructor tests to ensure that previously untested code paths are now properly covered.Furthermore, this PR conducts benchmark testing for the move-assignment operator, demonstrating at least 500x performance improvement.#### Before:-------------------------------------------------------------------------------------Benchmark Time CPU Iterations-------------------------------------------------------------------------------------BM_CopyConstruct/vector_bool/5140480 8596063 ns 8563419 ns 78BM_MoveConstruct/vector_bool/5140480 0.474 ns 0.473 ns 1470604919BM_CopyConstruct_Alloc/vector_bool/5140480 8182327 ns 8169876 ns 83BM_MoveConstruct_Alloc/vector_bool/5140480 8544951 ns 8537676 ns 83#### After:-------------------------------------------------------------------------------------Benchmark Time CPU Iterations-------------------------------------------------------------------------------------BM_CopyConstruct/vector_bool/5140480 14899 ns 14943 ns 46010BM_MoveConstruct/vector_bool/5140480 0.451 ns 0.453 ns 1571837345BM_CopyConstruct_Alloc/vector_bool/5140480 14979 ns 15033 ns 47913BM_MoveConstruct_Alloc/vector_bool/5140480 15043 ns 15101 ns 45906