-
Notifications
You must be signed in to change notification settings - Fork 6.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REPLACE PARTITION ... FROM ... waits for the merges in unaffected partitions #45328
Comments
It does kill background merges. But a merge (just like any operation in CLickHouse) cannot be killed at an arbitrary point, only between the processing of blocks. |
It actually does 'stop-the-world' thing, so it stop ALL the merges (also in unaffected partitions), and that can take a lot of time. Test: DROP TABLE IF EXISTS xyz;
create table xyz (p UInt8, i UInt64) Engine=MergeTree PARTITION BY p ORDER BY tuple();
SYSTEM STOP MERGES xyz;
-- let's create a lot of partition with a lot of parts which should be merged (mimic 'debt' after high inserts pressure):
-- select format('SET min_insert_block_size_rows = {1}; INSERT INTO xyz SELECT {0}, * FROM numbers({1}*100);', toString(number), toString(number*8192)) from numbers(50) FORMAT TSVRaw;
-- I've create partitions with parts of different size (so it can pick smaller and bigger)
SET min_insert_block_size_rows = 65536; INSERT INTO xyz SELECT 8, * FROM numbers(65536*100);
SET min_insert_block_size_rows = 73728; INSERT INTO xyz SELECT 9, * FROM numbers(73728*100);
SET min_insert_block_size_rows = 81920; INSERT INTO xyz SELECT 10, * FROM numbers(81920*100);
SET min_insert_block_size_rows = 90112; INSERT INTO xyz SELECT 11, * FROM numbers(90112*100);
SET min_insert_block_size_rows = 98304; INSERT INTO xyz SELECT 12, * FROM numbers(98304*100);
SET min_insert_block_size_rows = 106496; INSERT INTO xyz SELECT 13, * FROM numbers(106496*100);
SET min_insert_block_size_rows = 114688; INSERT INTO xyz SELECT 14, * FROM numbers(114688*100);
SET min_insert_block_size_rows = 122880; INSERT INTO xyz SELECT 15, * FROM numbers(122880*100);
SET min_insert_block_size_rows = 131072; INSERT INTO xyz SELECT 16, * FROM numbers(131072*100);
SET min_insert_block_size_rows = 139264; INSERT INTO xyz SELECT 17, * FROM numbers(139264*100);
SET min_insert_block_size_rows = 147456; INSERT INTO xyz SELECT 18, * FROM numbers(147456*100);
SET min_insert_block_size_rows = 155648; INSERT INTO xyz SELECT 19, * FROM numbers(155648*100);
SET min_insert_block_size_rows = 163840; INSERT INTO xyz SELECT 20, * FROM numbers(163840*100);
SET min_insert_block_size_rows = 172032; INSERT INTO xyz SELECT 21, * FROM numbers(172032*100);
SET min_insert_block_size_rows = 180224; INSERT INTO xyz SELECT 22, * FROM numbers(180224*100);
SET min_insert_block_size_rows = 188416; INSERT INTO xyz SELECT 23, * FROM numbers(188416*100);
SET min_insert_block_size_rows = 196608; INSERT INTO xyz SELECT 24, * FROM numbers(196608*100);
SET min_insert_block_size_rows = 204800; INSERT INTO xyz SELECT 25, * FROM numbers(204800*100);
SET min_insert_block_size_rows = 212992; INSERT INTO xyz SELECT 26, * FROM numbers(212992*100);
SET min_insert_block_size_rows = 221184; INSERT INTO xyz SELECT 27, * FROM numbers(221184*100);
SET min_insert_block_size_rows = 229376; INSERT INTO xyz SELECT 28, * FROM numbers(229376*100);
SET min_insert_block_size_rows = 237568; INSERT INTO xyz SELECT 29, * FROM numbers(237568*100);
SET min_insert_block_size_rows = 245760; INSERT INTO xyz SELECT 30, * FROM numbers(245760*100);
SET min_insert_block_size_rows = 253952; INSERT INTO xyz SELECT 31, * FROM numbers(253952*100);
SET min_insert_block_size_rows = 262144; INSERT INTO xyz SELECT 32, * FROM numbers(262144*100);
SET min_insert_block_size_rows = 270336; INSERT INTO xyz SELECT 33, * FROM numbers(270336*100);
SET min_insert_block_size_rows = 278528; INSERT INTO xyz SELECT 34, * FROM numbers(278528*100);
SET min_insert_block_size_rows = 286720; INSERT INTO xyz SELECT 35, * FROM numbers(286720*100);
SET min_insert_block_size_rows = 294912; INSERT INTO xyz SELECT 36, * FROM numbers(294912*100);
SET min_insert_block_size_rows = 303104; INSERT INTO xyz SELECT 37, * FROM numbers(303104*100);
SET min_insert_block_size_rows = 311296; INSERT INTO xyz SELECT 38, * FROM numbers(311296*100);
SET min_insert_block_size_rows = 319488; INSERT INTO xyz SELECT 39, * FROM numbers(319488*100);
SET min_insert_block_size_rows = 327680; INSERT INTO xyz SELECT 40, * FROM numbers(327680*100);
SET min_insert_block_size_rows = 335872; INSERT INTO xyz SELECT 41, * FROM numbers(335872*100);
SET min_insert_block_size_rows = 344064; INSERT INTO xyz SELECT 42, * FROM numbers(344064*100);
SET min_insert_block_size_rows = 352256; INSERT INTO xyz SELECT 43, * FROM numbers(352256*100);
SET min_insert_block_size_rows = 360448; INSERT INTO xyz SELECT 44, * FROM numbers(360448*100);
SET min_insert_block_size_rows = 368640; INSERT INTO xyz SELECT 45, * FROM numbers(368640*100);
SET min_insert_block_size_rows = 376832; INSERT INTO xyz SELECT 46, * FROM numbers(376832*100);
SET min_insert_block_size_rows = 385024; INSERT INTO xyz SELECT 47, * FROM numbers(385024*100);
SET min_insert_block_size_rows = 393216; INSERT INTO xyz SELECT 48, * FROM numbers(393216*100);
-- that should give a merge speed 1 / 0.00002 = 50k rows per second
-- with max block size 64Kb it is max ~1 sec of waiting (till the end of block)
ALTER TABLE xyz ADD COLUMN make_merge_slower UInt8 DEFAULT sleepEachRow(0.00002);
-- prepare the table to replace partition from
DROP TABLE IF EXISTS foo;
CREATE TABLE foo AS xyz;
INSERT INTO foo select 48, 100000 + number, 0 FROM numbers(1000000);
SYSTEM START MERGES xyz;
-- now do some more inserts in all partition to trigger background merges
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
select partition_id, rows_read, rows_written, elapsed, progress FROM system.merges WHERE table ='xyz' ORDER BY toUInt32(partition_id);
-- check how long will it take:
alter table xyz replace partition id '48' from foo;
-- in meanwhile you can check in different session: no merges for partition 48!
select partition_id, rows_read, rows_written, elapsed, progress FROM system.merges WHERE table ='xyz' ORDER BY toUInt32(partition_id);
-- replace partition actually stops merges for ALL the partitions in the table(!)
-- so after the slowest merge is finished, it will go the replace partition and then start merging again
-- stack trace of the stuck replace partition:
-- pthread_cond_timedwait
-- DB::StorageMergeTree::stopMergesAndWait()
-- DB::StorageMergeTree::replacePartitionFrom(std::__1::shared_ptr<DB::IStorage> const&, std::__1::shared_ptr<DB::IAST> const&, bool, std::__1::shared_ptr<DB::Context const>)
-- DB::MergeTreeData::alterPartition(std::__1::shared_ptr<DB::StorageInMemoryMetadata const> const&, std::__1::vector<DB::PartitionCommand, std::__1::allocator<DB::PartitionCommand>> const&, std::__1::shared_ptr<DB::Context const>)
-- DB::InterpreterAlterQuery::executeToTable(DB::ASTAlterQuery const&)
-- DB::InterpreterAlterQuery::execute()
-- DB::executeQueryImpl(char const*, char const*, std::__1::shared_ptr<DB::Context>, bool, DB::QueryProcessingStage::Enum, DB::ReadBuffer*)
-- DB::executeQuery(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&, std::__1::shared_ptr<DB::Context>, bool, DB::QueryProcessingStage::Enum)
-- DB::TCPHandler::runImpl()
-- DB::TCPHandler::run()
-- Poco::Net::TCPServerConnection::start()
-- Poco::Net::TCPServerDispatcher::run()
-- Poco::PooledThread::run()
-- Poco::ThreadImpl::runnableEntry(void*) |
Generally it seem the task is to fix that TODO: ClickHouse/src/Storages/StorageMergeTree.cpp Lines 1441 to 1442 in 33877b5
|
@filimonov any progress on this ? |
Same thing with DROP PARTITION, MOVE PARTITION, etc ... those should not wait for merges (even moreover merges that were not even started) |
The text was updated successfully, but these errors were encountered: