Skip to content

Commit

Permalink
Merge pull request #11638 from azat/skip-idx-bloom-filter-fix
Browse files Browse the repository at this point in the history
Fix bloom filters for String (data skipping indices)

(cherry picked from commit e460d7c)
  • Loading branch information
akuzm authored and Vitaly Baranov committed Jun 20, 2020
1 parent 8285677 commit 869dfce
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 7 deletions.
7 changes: 3 additions & 4 deletions src/Interpreters/BloomFilterHash.h
Expand Up @@ -196,18 +196,17 @@ struct BloomFilterHash
const ColumnString::Chars & data = index_column->getChars();
const ColumnString::Offsets & offsets = index_column->getOffsets();

ColumnString::Offset current_offset = pos;
for (size_t index = 0, size = vec.size(); index < size; ++index)
{
ColumnString::Offset current_offset = offsets[index + pos - 1];
size_t length = offsets[index + pos] - current_offset - 1 /* terminating zero */;
UInt64 city_hash = CityHash_v1_0_2::CityHash64(
reinterpret_cast<const char *>(&data[current_offset]), offsets[index + pos] - current_offset - 1);
reinterpret_cast<const char *>(&data[current_offset]), length);

if constexpr (is_first)
vec[index] = city_hash;
else
vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash));

current_offset = offsets[index + pos];
}
}
else if (const auto * fixed_string_index_column = typeid_cast<const ColumnFixedString *>(column))
Expand Down
6 changes: 3 additions & 3 deletions tests/queries/0_stateless/00945_bloom_filter_index.sql
Expand Up @@ -43,7 +43,7 @@ SELECT COUNT() FROM bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_r
SELECT COUNT() FROM bloom_filter_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;

SELECT COUNT() FROM bloom_filter_types_test WHERE str IN ( SELECT str FROM bloom_filter_types_test);
Expand Down Expand Up @@ -122,7 +122,7 @@ SELECT COUNT() FROM bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows
SELECT COUNT() FROM bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;

SELECT COUNT() FROM bloom_filter_null_types_test WHERE isNull(i8);
Expand Down Expand Up @@ -150,7 +150,7 @@ CREATE TABLE bloom_filter_lc_null_types_test (order_key UInt64, str LowCardinali
INSERT INTO bloom_filter_lc_null_types_test SELECT number AS order_key, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100;
INSERT INTO bloom_filter_lc_null_types_test SELECT 0 AS order_key, NULL AS str, NULL AS fixed_string;

SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;

SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE isNull(str);
Expand Down
@@ -0,0 +1,4 @@
1
1
1
1
@@ -0,0 +1,8 @@
DROP TABLE IF EXISTS test_01307;
CREATE TABLE test_01307 (id UInt64, val String, INDEX ind val TYPE bloom_filter() GRANULARITY 1) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 2;
INSERT INTO test_01307 (id, val) select number as id, toString(number) as val from numbers(4);
SELECT count() FROM test_01307 WHERE identity(val) = '2';
SELECT count() FROM test_01307 WHERE val = '2';
OPTIMIZE TABLE test_01307 FINAL;
SELECT count() FROM test_01307 WHERE identity(val) = '2';
SELECT count() FROM test_01307 WHERE val = '2';

0 comments on commit 869dfce

Please sign in to comment.