Skip to content

Commit

Permalink
Merge pull request #9048 from ClickHouse/fix_mutation_order
Browse files Browse the repository at this point in the history
Fix primary.idx corruption after delete mutation
  • Loading branch information
tavplubix committed Feb 9, 2020
1 parent 7981e73 commit fc151dc
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 8 deletions.
34 changes: 34 additions & 0 deletions dbms/src/Interpreters/MutationsInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTOrderByElement.h>
#include <IO/WriteHelpers.h>


Expand Down Expand Up @@ -525,6 +526,39 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
}
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
}
auto metadata = storage->getInMemoryMetadata();
/// We have to execute select in order of primary key
/// because we don't sort results additionaly and don't have
/// any guarantees on data order without ORDER BY. It's almost free, because we
/// have optimization for data read in primary key order.
if (metadata.order_by_ast)
{
ASTPtr dummy;

ASTPtr key_expr;
if (metadata.primary_key_ast)
key_expr = metadata.primary_key_ast;
else
key_expr = metadata.order_by_ast;

bool empty = false;
/// In all other cases we cannot have empty key
if (auto key_function = key_expr->as<ASTFunction>())
empty = key_function->arguments->children.size() == 0;

/// Not explicitely spicified empty key
if (!empty)
{
auto order_by_expr = std::make_shared<ASTOrderByElement>(1, 1, false, dummy, false, dummy, dummy, dummy);


order_by_expr->children.push_back(key_expr);
auto res = std::make_shared<ASTExpressionList>();
res->children.push_back(order_by_expr);

select->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(res));
}
}

return select;
}
Expand Down
6 changes: 6 additions & 0 deletions dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ class StorageFromMergeTreeDataPart : public ext::shared_ptr_helper<StorageFromMe
return part->storage.mayBenefitFromIndexForIn(left_in_operand, query_context);
}

StorageInMemoryMetadata getInMemoryMetadata() const override
{
return part->storage.getInMemoryMetadata();
}


protected:
StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_)
: IStorage(part_->storage.getVirtuals()), part(part_)
Expand Down
17 changes: 9 additions & 8 deletions dbms/src/Storages/StorageReplicatedMergeTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3207,6 +3207,7 @@ void StorageReplicatedMergeTree::alter(
/// metadata alter.
if (params.isSettingsAlter())
{
lockStructureExclusively(table_lock_holder, query_context.getCurrentQueryId());
/// We don't replicate storage_settings_ptr ALTER. It's local operation.
/// Also we don't upgrade alter lock to table structure lock.
LOG_DEBUG(log, "ALTER storage_settings_ptr only");
Expand Down Expand Up @@ -3256,9 +3257,7 @@ void StorageReplicatedMergeTree::alter(
std::vector<ChangedNode> changed_nodes;

{
/// Just to read current structure. Alter will be done in separate thread.
auto table_lock = lockStructureForShare(false, query_context.getCurrentQueryId());

/// We can safely read structure, because we guarded with alter_intention_lock
if (is_readonly)
throw Exception("Can't ALTER readonly table", ErrorCodes::TABLE_IS_READ_ONLY);

Expand Down Expand Up @@ -3289,11 +3288,13 @@ void StorageReplicatedMergeTree::alter(
changed_nodes.emplace_back(zookeeper_path, "metadata", new_metadata_str);

/// Perform settings update locally

auto old_metadata = getInMemoryMetadata();
old_metadata.settings_ast = metadata.settings_ast;
changeSettings(metadata.settings_ast, table_lock_holder);
global_context.getDatabase(current_database_name)->alterTable(query_context, current_table_name, old_metadata);
{
lockStructureExclusively(table_lock_holder, query_context.getCurrentQueryId());
auto old_metadata = getInMemoryMetadata();
old_metadata.settings_ast = metadata.settings_ast;
changeSettings(metadata.settings_ast, table_lock_holder);
global_context.getDatabase(current_database_name)->alterTable(query_context, current_table_name, old_metadata);
}

/// Modify shared metadata nodes in ZooKeeper.
Coordination::Requests ops;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
43200
0
43200
43200
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env bash

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh


$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement"

$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Europe/Moscow')) Engine = MergeTree ORDER BY (toStartOfHour(date));"

$CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00') + number%(23*3600) from numbers(1000000);"

$CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE movement FINAL"

$CLICKHOUSE_CLIENT -n --query "
SELECT
count(),
toStartOfHour(date) AS Hour
FROM movement
WHERE (date >= toDateTime('2020-01-22T10:00:00')) AND (date <= toDateTime('2020-01-22T23:00:00'))
GROUP BY Hour
ORDER BY Hour DESC
" | grep "16:00:00" | cut -f1


$CLICKHOUSE_CLIENT --query "alter table movement delete where date >= toDateTime('2020-01-22T16:00:00') and date < toDateTime('2020-01-22T17:00:00') SETTINGS mutations_sync = 2"

$CLICKHOUSE_CLIENT -n --query "
SELECT
count(),
toStartOfHour(date) AS Hour
FROM movement
WHERE (date >= toDateTime('2020-01-22T10:00:00')) AND (date <= toDateTime('2020-01-22T23:00:00'))
GROUP BY Hour
ORDER BY Hour DESC
" | grep "16:00:00" | wc -l


$CLICKHOUSE_CLIENT -n --query "
SELECT
count(),
toStartOfHour(date) AS Hour
FROM movement
WHERE (date >= toDateTime('2020-01-22T10:00:00')) AND (date <= toDateTime('2020-01-22T23:00:00'))
GROUP BY Hour
ORDER BY Hour DESC
" | grep "22:00:00" | cut -f1


$CLICKHOUSE_CLIENT -n --query "
SELECT
count(),
toStartOfHour(date) AS Hour
FROM movement
GROUP BY Hour
ORDER BY Hour DESC
" | grep "22:00:00" | cut -f1


$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement"

0 comments on commit fc151dc

Please sign in to comment.