Skip to content

Commit 3fae92e

Browse files
trflynn89awesomekling
authored andcommitted
LibUnicode: Search code point properties sequentially at compile time
When generating code point property tables, we currently binary search the code point range lists for each property to decide if a code point has that property. However, we are both iterating over the code points and through the sorted properties in order. This means we do not need to search code point ranges that are below the current code point at all. We can even remove the code point ranges that fall below the current code point, as we will not see a code point in those ranges again. On my machine, this reduces the run time of GenerateUnicodeData from 3.4 seconds to 1.2 seconds.
1 parent 1f1d5ed commit 3fae92e

File tree

1 file changed

+20
-8
lines changed

1 file changed

+20
-8
lines changed

Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeData.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include "GeneratorUtil.h"
88
#include <AK/AllOf.h>
99
#include <AK/Array.h>
10-
#include <AK/BinarySearch.h>
1110
#include <AK/CharacterTypes.h>
1211
#include <AK/DeprecatedString.h>
1312
#include <AK/Error.h>
@@ -1540,7 +1539,7 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
15401539
static constexpr auto MAX_CODE_POINT = 0x10ffffu;
15411540

15421541
struct TableMetadata {
1543-
static ErrorOr<TableMetadata> create(PropList const& property_list)
1542+
static ErrorOr<TableMetadata> create(PropList& property_list)
15441543
{
15451544
TableMetadata data;
15461545
TRY(data.property_values.try_ensure_capacity(property_list.size()));
@@ -1549,15 +1548,15 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
15491548
auto property_names = property_list.keys();
15501549
quick_sort(property_names);
15511550

1552-
for (auto const& property_name : property_names) {
1553-
auto const& code_point_rages = property_list.get(property_name).value();
1554-
data.property_values.unchecked_append(code_point_rages);
1551+
for (auto& property_name : property_names) {
1552+
auto& code_point_ranges = property_list.get(property_name).value();
1553+
data.property_values.unchecked_append(move(code_point_ranges));
15551554
}
15561555

15571556
return data;
15581557
}
15591558

1560-
Vector<typename PropList::ValueType const&> property_values;
1559+
Vector<typename PropList::ValueType> property_values;
15611560
Vector<bool> property_set;
15621561

15631562
Vector<size_t> current_block;
@@ -1566,10 +1565,23 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
15661565

15671566
auto update_tables = [](auto code_point, auto& tables, auto& metadata) -> ErrorOr<void> {
15681567
static constexpr auto BLOCK_SIZE = CodePointTables::LSB_MASK + 1;
1568+
static Unicode::CodePointRangeComparator comparator {};
1569+
1570+
for (auto& property_values : metadata.property_values) {
1571+
size_t ranges_to_remove = 0;
1572+
auto has_property = false;
1573+
1574+
for (auto const& range : property_values) {
1575+
if (auto comparison = comparator(code_point, range); comparison <= 0) {
1576+
has_property = comparison == 0;
1577+
break;
1578+
}
1579+
1580+
++ranges_to_remove;
1581+
}
15691582

1570-
for (auto const& property_values : metadata.property_values) {
1571-
auto has_property = binary_search(property_values, code_point, nullptr, Unicode::CodePointRangeComparator {}) != nullptr;
15721583
metadata.property_set.unchecked_append(has_property);
1584+
property_values.remove(0, ranges_to_remove);
15731585
}
15741586

15751587
size_t unique_properties_index = 0;

0 commit comments

Comments
 (0)