Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix recalculation of skip indexes in ALTER UPDATE queries when table has adaptive granularity #55202

Merged
merged 6 commits into from Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 15 additions & 4 deletions src/Interpreters/MutationsInterpreter.cpp
Expand Up @@ -40,6 +40,7 @@
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Parsers/makeASTForLogicalFunction.h>
#include <Common/logger_useful.h>
#include <Storages/MergeTree/MergeTreeDataPartType.h>

namespace DB
{
Expand Down Expand Up @@ -304,6 +305,11 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const
return part && part->hasProjection(name);
}

bool MutationsInterpreter::Source::isCompactPart() const
{
return part && part->getType() == MergeTreeDataPartType::Compact;
}

static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage)
{
auto all_columns = metadata_snapshot->getColumns().getNamesOfPhysical();
Expand Down Expand Up @@ -562,7 +568,8 @@ void MutationsInterpreter::prepare(bool dry_run)
if (settings.recalculate_dependencies_of_updated_columns)
dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns, has_dependency);

bool has_alter_delete = false;
bool need_rebuild_indexes = false;
bool need_rebuild_projections = false;
std::vector<String> read_columns;

/// First, break a sequence of commands into stages.
Expand All @@ -583,7 +590,8 @@ void MutationsInterpreter::prepare(bool dry_run)
predicate = makeASTFunction("isZeroOrNull", predicate);

stages.back().filters.push_back(predicate);
has_alter_delete = true;
need_rebuild_indexes = true;
need_rebuild_projections = true;
}
else if (command.type == MutationCommand::UPDATE)
{
Expand Down Expand Up @@ -687,6 +695,9 @@ void MutationsInterpreter::prepare(bool dry_run)
}
}
}

if (source.isCompactPart() && source.getMergeTreeData() && source.getMergeTreeData()->getSettings()->index_granularity_bytes > 0)
need_rebuild_indexes = true;
CurtizJ marked this conversation as resolved.
Show resolved Hide resolved
}
else if (command.type == MutationCommand::MATERIALIZE_COLUMN)
{
Expand Down Expand Up @@ -892,7 +903,7 @@ void MutationsInterpreter::prepare(bool dry_run)
if (!source.hasSecondaryIndex(index.name))
continue;

if (has_alter_delete)
if (need_rebuild_indexes)
{
materialized_indices.insert(index.name);
continue;
Expand All @@ -913,7 +924,7 @@ void MutationsInterpreter::prepare(bool dry_run)
if (!source.hasProjection(projection.name))
continue;

if (has_alter_delete)
if (need_rebuild_projections)
{
materialized_projections.insert(projection.name);
continue;
Expand Down
1 change: 1 addition & 0 deletions src/Interpreters/MutationsInterpreter.h
Expand Up @@ -122,6 +122,7 @@ class MutationsInterpreter
bool materializeTTLRecalculateOnly() const;
bool hasSecondaryIndex(const String & name) const;
bool hasProjection(const String & name) const;
bool isCompactPart() const;

void read(
Stage & first_stage,
Expand Down
11 changes: 6 additions & 5 deletions src/Storages/MergeTree/MergeTreeMarksLoader.cpp
Expand Up @@ -107,21 +107,22 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
// We first read the marks into a temporary simple array, then compress them into a more compact
// representation.
PODArray<MarkInCompressedFile> plain_marks(marks_count * columns_in_mark); // temporary
auto full_mark_path = std::string(fs::path(data_part_storage->getFullPath()) / mrk_path);

if (file_size == 0 && marks_count != 0)
{
throw Exception(
ErrorCodes::CORRUPTED_DATA,
"Empty marks file '{}': {}, must be: {}",
std::string(fs::path(data_part_storage->getFullPath()) / mrk_path),
full_mark_path,
file_size, expected_uncompressed_size);
}

if (!index_granularity_info.mark_type.compressed && expected_uncompressed_size != file_size)
throw Exception(
ErrorCodes::CORRUPTED_DATA,
"Bad size of marks file '{}': {}, must be: {}",
std::string(fs::path(data_part_storage->getFullPath()) / mrk_path),
full_mark_path,
file_size,
expected_uncompressed_size);

Expand All @@ -142,7 +143,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
throw Exception(
ErrorCodes::CANNOT_READ_ALL_DATA,
"Cannot read all marks from file {}, is eof: {}, buffer size: {}, file size: {}",
mrk_path,
full_mark_path,
reader->eof(),
reader->buffer().size(),
file_size);
Expand All @@ -155,7 +156,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
throw Exception(
ErrorCodes::CANNOT_READ_ALL_DATA,
"Cannot read all marks from file {}, marks expected {} (bytes size {}), marks read {} (bytes size {})",
mrk_path, marks_count, expected_uncompressed_size, i, reader->count());
full_mark_path, marks_count, expected_uncompressed_size, i, reader->count());

size_t granularity;
reader->readStrict(
Expand All @@ -167,7 +168,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
throw Exception(
ErrorCodes::CANNOT_READ_ALL_DATA,
"Too many marks in file {}, marks expected {} (bytes size {})",
mrk_path, marks_count, expected_uncompressed_size);
full_mark_path, marks_count, expected_uncompressed_size);
}

#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
Expand Down
@@ -0,0 +1 @@
342 442 The Containers library is a generic collection of class templates and algorithms that allow programmers to easily implement common data structures like queues, lists and stacks
@@ -0,0 +1,24 @@
CREATE TABLE kv
(
`key` UInt64,
`value` UInt64,
`s` String,
INDEX value_idx value TYPE minmax GRANULARITY 1
)
ENGINE = ReplacingMergeTree
ORDER BY key
SETTINGS index_granularity = 32, index_granularity_bytes = 1024;

INSERT INTO kv SELECT
number,
number + 100,
toString(number)
FROM numbers(2048);

ALTER TABLE kv
UPDATE s = 'The Containers library is a generic collection of class templates and algorithms that allow programmers to easily implement common data structures like queues, lists and stacks' WHERE 1
SETTINGS mutations_sync = 2;

SELECT *
FROM kv
WHERE value = 442;