Skip to content

Commit ab3d793

Browse files
authored
[libc++] Optimize ranges::move{,_backward} for vector<bool>::iterator (llvm#121109)
As a follow-up to llvm#121013 (which optimized `ranges::copy`) and llvm#121026 (which optimized `ranges::copy_backward`), this PR enhances the performance of `std::ranges::{move, move_backward}` for `vector<bool>::iterator`, addressing a subtask outlined in issue llvm#64038. The optimizations bring performance improvements analogous to those achieved for the `{copy, copy_backward}` algorithms: up to 2000x for aligned moves and 60x for unaligned moves. Moreover, comprehensive tests covering up to 4 storage words (256 bytes) with odd and even bit sizes are provided, which validate the proposed optimizations in this patch.
1 parent 70e693c commit ab3d793

File tree

10 files changed

+414
-112
lines changed

10 files changed

+414
-112
lines changed

libcxx/docs/ReleaseNotes/21.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ Implemented Papers
4343
Improvements and New Features
4444
-----------------------------
4545

46-
- The ``std::ranges::{copy, copy_n, copy_backward}`` algorithms have been optimized for ``std::vector<bool>::iterator``\s,
47-
resulting in a performance improvement of up to 2000x.
46+
- The ``std::ranges::{copy, copy_n, copy_backward, move, move_backward}`` algorithms have been optimized for
47+
``std::vector<bool>::iterator``, resulting in a performance improvement of up to 2000x.
4848

4949
- Updated formatting library to Unicode 16.0.0.
5050

libcxx/include/__algorithm/move.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99
#ifndef _LIBCPP___ALGORITHM_MOVE_H
1010
#define _LIBCPP___ALGORITHM_MOVE_H
1111

12+
#include <__algorithm/copy.h>
1213
#include <__algorithm/copy_move_common.h>
1314
#include <__algorithm/for_each_segment.h>
1415
#include <__algorithm/iterator_operations.h>
1516
#include <__algorithm/min.h>
1617
#include <__config>
18+
#include <__fwd/bit_reference.h>
1719
#include <__iterator/iterator_traits.h>
1820
#include <__iterator/segmented_iterator.h>
1921
#include <__type_traits/common_type.h>
@@ -98,6 +100,14 @@ struct __move_impl {
98100
}
99101
}
100102

103+
template <class _Cp, bool _IsConst>
104+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
105+
operator()(__bit_iterator<_Cp, _IsConst> __first,
106+
__bit_iterator<_Cp, _IsConst> __last,
107+
__bit_iterator<_Cp, false> __result) {
108+
return std::__copy(__first, __last, __result);
109+
}
110+
101111
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
102112
template <class _In, class _Out, __enable_if_t<__can_lower_move_assignment_to_memmove<_In, _Out>::value, int> = 0>
103113
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>

libcxx/include/__algorithm/move_backward.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
#ifndef _LIBCPP___ALGORITHM_MOVE_BACKWARD_H
1010
#define _LIBCPP___ALGORITHM_MOVE_BACKWARD_H
1111

12+
#include <__algorithm/copy_backward.h>
1213
#include <__algorithm/copy_move_common.h>
1314
#include <__algorithm/iterator_operations.h>
1415
#include <__algorithm/min.h>
1516
#include <__config>
17+
#include <__fwd/bit_reference.h>
1618
#include <__iterator/iterator_traits.h>
1719
#include <__iterator/segmented_iterator.h>
1820
#include <__type_traits/common_type.h>
@@ -107,6 +109,14 @@ struct __move_backward_impl {
107109
}
108110
}
109111

112+
template <class _Cp, bool _IsConst>
113+
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__bit_iterator<_Cp, _IsConst>, __bit_iterator<_Cp, false> >
114+
operator()(__bit_iterator<_Cp, _IsConst> __first,
115+
__bit_iterator<_Cp, _IsConst> __last,
116+
__bit_iterator<_Cp, false> __result) {
117+
return std::__copy_backward<_ClassicAlgPolicy>(__first, __last, __result);
118+
}
119+
110120
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
111121
template <class _In, class _Out, __enable_if_t<__can_lower_move_assignment_to_memmove<_In, _Out>::value, int> = 0>
112122
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>

libcxx/include/__bit_reference

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -210,22 +210,6 @@ private:
210210
__mask_(__m) {}
211211
};
212212

213-
// move
214-
215-
template <class _Cp, bool _IsConst>
216-
inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false>
217-
move(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
218-
return std::copy(__first, __last, __result);
219-
}
220-
221-
// move_backward
222-
223-
template <class _Cp, bool _IsConst>
224-
inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, false> move_backward(
225-
__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) {
226-
return std::copy_backward(__first, __last, __result);
227-
}
228-
229213
// swap_ranges
230214

231215
template <class _Cl, class _Cr>
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
10+
11+
#include <algorithm>
12+
#include <benchmark/benchmark.h>
13+
#include <ranges>
14+
#include <vector>
15+
16+
template <bool aligned>
17+
void bm_ranges_move_vb(benchmark::State& state) {
18+
auto n = state.range();
19+
std::vector<bool> v1(n, true);
20+
std::vector<bool> v2(n, false);
21+
benchmark::DoNotOptimize(v1);
22+
benchmark::DoNotOptimize(v2);
23+
std::vector<bool>* in = &v1;
24+
std::vector<bool>* out = &v2;
25+
for (auto _ : state) {
26+
if constexpr (aligned) {
27+
benchmark::DoNotOptimize(std::ranges::move(*in, std::ranges::begin(*out)));
28+
} else {
29+
benchmark::DoNotOptimize(
30+
std::ranges::move(std::views::counted(in->begin() + 4, n - 4), std::ranges::begin(*out)));
31+
}
32+
std::swap(in, out);
33+
benchmark::DoNotOptimize(in);
34+
benchmark::DoNotOptimize(out);
35+
}
36+
}
37+
38+
template <bool aligned>
39+
void bm_move_vb(benchmark::State& state) {
40+
auto n = state.range();
41+
std::vector<bool> v1(n, true);
42+
std::vector<bool> v2(n, false);
43+
benchmark::DoNotOptimize(v1);
44+
benchmark::DoNotOptimize(v2);
45+
std::vector<bool>* in = &v1;
46+
std::vector<bool>* out = &v2;
47+
for (auto _ : state) {
48+
auto first1 = in->begin();
49+
auto last1 = in->end();
50+
auto first2 = out->begin();
51+
if constexpr (aligned) {
52+
benchmark::DoNotOptimize(std::move(first1, last1, first2));
53+
} else {
54+
benchmark::DoNotOptimize(std::move(first1 + 4, last1, first2));
55+
}
56+
std::swap(in, out);
57+
benchmark::DoNotOptimize(in);
58+
benchmark::DoNotOptimize(out);
59+
}
60+
}
61+
62+
BENCHMARK(bm_ranges_move_vb<true>)
63+
->Name("bm_ranges_move_vb_aligned")
64+
->Range(8, 1 << 16)
65+
->DenseRange(102400, 204800, 4096);
66+
BENCHMARK(bm_ranges_move_vb<false>)->Name("bm_ranges_move_vb_unaligned")->Range(8, 1 << 20);
67+
68+
BENCHMARK(bm_move_vb<true>)->Name("bm_move_vb_aligned")->Range(8, 1 << 20);
69+
BENCHMARK(bm_move_vb<false>)->Name("bm_move_vb_unaligned")->Range(8, 1 << 20);
70+
71+
BENCHMARK_MAIN();
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
10+
11+
#include <algorithm>
12+
#include <benchmark/benchmark.h>
13+
#include <ranges>
14+
#include <vector>
15+
16+
template <bool aligned>
17+
void bm_ranges_move_backward_vb(benchmark::State& state) {
18+
auto n = state.range();
19+
std::vector<bool> v1(n, true);
20+
std::vector<bool> v2(n, false);
21+
benchmark::DoNotOptimize(v1);
22+
benchmark::DoNotOptimize(v2);
23+
std::vector<bool>* in = &v1;
24+
std::vector<bool>* out = &v2;
25+
for (auto _ : state) {
26+
if constexpr (aligned) {
27+
benchmark::DoNotOptimize(std::ranges::move_backward(*in, std::ranges::end(*out)));
28+
} else {
29+
benchmark::DoNotOptimize(
30+
std::ranges::move_backward(std::views::counted(in->begin(), n - 4), std::ranges::end(*out)));
31+
}
32+
std::swap(in, out);
33+
benchmark::DoNotOptimize(in);
34+
benchmark::DoNotOptimize(out);
35+
}
36+
}
37+
38+
template <bool aligned>
39+
void bm_move_backward_vb(benchmark::State& state) {
40+
auto n = state.range();
41+
std::vector<bool> v1(n, true);
42+
std::vector<bool> v2(n, false);
43+
benchmark::DoNotOptimize(v1);
44+
benchmark::DoNotOptimize(v2);
45+
std::vector<bool>* in = &v1;
46+
std::vector<bool>* out = &v2;
47+
for (auto _ : state) {
48+
auto first1 = in->begin();
49+
auto last1 = in->end();
50+
auto last2 = out->end();
51+
if constexpr (aligned) {
52+
benchmark::DoNotOptimize(std::move_backward(first1, last1, last2));
53+
} else {
54+
benchmark::DoNotOptimize(std::move_backward(first1, last1 - 4, last2));
55+
}
56+
std::swap(in, out);
57+
benchmark::DoNotOptimize(in);
58+
benchmark::DoNotOptimize(out);
59+
}
60+
}
61+
62+
BENCHMARK(bm_ranges_move_backward_vb<true>)
63+
->Name("bm_ranges_move_backward_vb_aligned")
64+
->Range(8, 1 << 16)
65+
->DenseRange(102400, 204800, 4096);
66+
BENCHMARK(bm_ranges_move_backward_vb<false>)->Name("bm_ranges_move_backward_vb_unaligned")->Range(8, 1 << 20);
67+
68+
BENCHMARK(bm_move_backward_vb<true>)->Name("bm_move_backward_vb_aligned")->Range(8, 1 << 20);
69+
BENCHMARK(bm_move_backward_vb<false>)->Name("bm_move_backward_vb_unaligned")->Range(8, 1 << 20);
70+
71+
BENCHMARK_MAIN();

libcxx/test/std/algorithms/alg.modifying.operations/alg.move/move.pass.cpp

Lines changed: 44 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <cassert>
2121
#include <iterator>
2222
#include <memory>
23+
#include <vector>
2324

2425
#include "MoveOnly.h"
2526
#include "test_iterators.h"
@@ -45,15 +46,15 @@ struct Test {
4546
template <class OutIter>
4647
TEST_CONSTEXPR_CXX20 void operator()() {
4748
const unsigned N = 1000;
48-
int ia[N] = {};
49+
int ia[N] = {};
4950
for (unsigned i = 0; i < N; ++i)
50-
ia[i] = i;
51+
ia[i] = i;
5152
int ib[N] = {0};
5253

53-
OutIter r = std::move(InIter(ia), InIter(ia+N), OutIter(ib));
54-
assert(base(r) == ib+N);
54+
OutIter r = std::move(InIter(ia), InIter(ia + N), OutIter(ib));
55+
assert(base(r) == ib + N);
5556
for (unsigned i = 0; i < N; ++i)
56-
assert(ia[i] == ib[i]);
57+
assert(ia[i] == ib[i]);
5758
}
5859
};
5960

@@ -73,13 +74,13 @@ struct Test1 {
7374
const unsigned N = 100;
7475
std::unique_ptr<int> ia[N];
7576
for (unsigned i = 0; i < N; ++i)
76-
ia[i].reset(new int(i));
77+
ia[i].reset(new int(i));
7778
std::unique_ptr<int> ib[N];
7879

79-
OutIter r = std::move(InIter(ia), InIter(ia+N), OutIter(ib));
80-
assert(base(r) == ib+N);
80+
OutIter r = std::move(InIter(ia), InIter(ia + N), OutIter(ib));
81+
assert(base(r) == ib + N);
8182
for (unsigned i = 0; i < N; ++i)
82-
assert(*ib[i] == static_cast<int>(i));
83+
assert(*ib[i] == static_cast<int>(i));
8384
}
8485
};
8586

@@ -92,6 +93,28 @@ struct Test1OutIters {
9293
}
9394
};
9495

96+
TEST_CONSTEXPR_CXX20 bool test_vector_bool(std::size_t N) {
97+
std::vector<bool> v(N, false);
98+
for (std::size_t i = 0; i < N; i += 2)
99+
v[i] = true;
100+
101+
{ // Test move with aligned bytes
102+
std::vector<bool> in(v);
103+
std::vector<bool> out(N);
104+
std::move(in.begin(), in.end(), out.begin());
105+
assert(out == v);
106+
}
107+
{ // Test move with unaligned bytes
108+
std::vector<bool> in(v);
109+
std::vector<bool> out(N);
110+
std::move(in.begin() + 4, in.end(), out.begin());
111+
for (std::size_t i = 0; i < N - 4; ++i)
112+
assert(v[i + 4] == out[i]);
113+
}
114+
115+
return true;
116+
}
117+
95118
TEST_CONSTEXPR_CXX20 bool test() {
96119
types::for_each(types::cpp17_input_iterator_list<int*>(), TestOutIters());
97120
if (TEST_STD_AT_LEAST_23_OR_RUNTIME_EVALUATED)
@@ -118,7 +141,7 @@ TEST_CONSTEXPR_CXX20 bool test() {
118141
// When non-trivial
119142
{
120143
MoveOnly from[3] = {1, 2, 3};
121-
MoveOnly to[3] = {};
144+
MoveOnly to[3] = {};
122145
std::move(std::begin(from), std::end(from), std::begin(to));
123146
assert(to[0] == MoveOnly(1));
124147
assert(to[1] == MoveOnly(2));
@@ -127,14 +150,24 @@ TEST_CONSTEXPR_CXX20 bool test() {
127150
// When trivial
128151
{
129152
TrivialMoveOnly from[3] = {1, 2, 3};
130-
TrivialMoveOnly to[3] = {};
153+
TrivialMoveOnly to[3] = {};
131154
std::move(std::begin(from), std::end(from), std::begin(to));
132155
assert(to[0] == TrivialMoveOnly(1));
133156
assert(to[1] == TrivialMoveOnly(2));
134157
assert(to[2] == TrivialMoveOnly(3));
135158
}
136159
}
137160

161+
{ // Test vector<bool>::iterator optimization
162+
assert(test_vector_bool(8));
163+
assert(test_vector_bool(19));
164+
assert(test_vector_bool(32));
165+
assert(test_vector_bool(49));
166+
assert(test_vector_bool(64));
167+
assert(test_vector_bool(199));
168+
assert(test_vector_bool(256));
169+
}
170+
138171
return true;
139172
}
140173

0 commit comments

Comments
 (0)