Skip to content

Commit 274f8ee

Browse files
trflynn89gmta
authored andcommitted
AK: Make hashing of UTF-16 strings cheaper
No need to iterate every byte of the string, we can iterate the code units instead. We must also actually record that we have cached the hash :^)
1 parent 73154de commit 274f8ee

File tree

5 files changed

+11
-13
lines changed

5 files changed

+11
-13
lines changed

AK/StringBase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ inline u32 StringBase::hash() const
219219
return string_hash(reinterpret_cast<char const*>(bytes.data()), bytes.size());
220220
}
221221
if (!m_impl.data)
222-
return string_hash(nullptr, 0);
222+
return string_hash<char>(nullptr, 0);
223223
return data_without_union_member_assertion()->hash();
224224
}
225225

AK/StringHash.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#pragma once
88

9+
#include <AK/Concepts.h>
910
#include <AK/Types.h>
1011

1112
namespace AK {
@@ -14,7 +15,8 @@ namespace AK {
1415
// We can't use SipHash since that depends on runtime parameters,
1516
// but some string hashes like IPC endpoint magic numbers need to be deterministic.
1617
// Maybe use a SipHash with a statically-known key?
17-
constexpr u32 string_hash(char const* characters, size_t length, u32 seed = 0)
18+
template<OneOf<char, char16_t> T>
19+
constexpr u32 string_hash(T const* characters, size_t length, u32 seed = 0)
1820
{
1921
u32 hash = seed;
2022
for (size_t i = 0; i < length; ++i) {

AK/Utf16StringBase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ class Utf16StringBase {
176176

177177
if (auto const* data = data_without_union_member_assertion())
178178
return data->hash();
179-
return string_hash(nullptr, 0);
179+
return string_hash<char16_t>(nullptr, 0);
180180
}
181181

182182
[[nodiscard]] ALWAYS_INLINE bool is_empty() const { return length_in_code_units() == 0uz; }

AK/Utf16StringData.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,11 @@ class Utf16StringData final : public RefCounted<Utf16StringData> {
7878

7979
ALWAYS_INLINE u32 hash() const
8080
{
81-
if (!m_has_hash)
82-
m_hash = calculate_hash();
81+
if (!m_has_hash) {
82+
m_hash = utf16_view().hash();
83+
m_has_hash = true;
84+
}
85+
8386
return m_hash;
8487
}
8588

@@ -128,13 +131,6 @@ class Utf16StringData final : public RefCounted<Utf16StringData> {
128131

129132
[[nodiscard]] size_t calculate_code_point_length() const;
130133

131-
[[nodiscard]] ALWAYS_INLINE u32 calculate_hash() const
132-
{
133-
if (has_ascii_storage())
134-
return ascii_view().hash();
135-
return utf16_view().hash();
136-
}
137-
138134
// We store whether this string has ASCII or UTF-16 storage by setting the most significant bit of m_length_in_code_units
139135
// to 1 for UTF-16 storage. This shrinks the size of most UTF-16 string related classes, at the cost of not being
140136
// allowed to create a string larger than 2**63 - 1.

AK/Utf16View.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ class Utf16View {
324324
return 0;
325325
if (has_ascii_storage())
326326
return string_hash(m_string.ascii, length_in_code_units());
327-
return string_hash(reinterpret_cast<char const*>(m_string.utf16), length_in_code_units() * sizeof(char16_t));
327+
return string_hash(m_string.utf16, length_in_code_units());
328328
}
329329

330330
[[nodiscard]] constexpr bool is_null() const

0 commit comments

Comments
 (0)