Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REPLACE PARTITION ... FROM ... waits for the merges in unaffected partitions #45328

Open
filimonov opened this issue Jan 16, 2023 · 5 comments
Open

Comments

@filimonov
Copy link
Contributor

cat <<EOF | clickhouse-client -mn
DROP TABLE IF EXISTS a sync;
DROP TABLE IF EXISTS b sync;
create table a (id UInt64) engine=MergeTree ORDER BY (id) as select * FROM numbers(100);
create table b (id UInt64) engine=MergeTree ORDER BY (id) as select * FROM numbers(200);
ALTER TABLE a ADD COLUMN slower_merge UInt8 MATERIALIZED sleepEachRow(1);
ALTER TABLE b ADD COLUMN slower_merge UInt8 MATERIALIZED sleepEachRow(1);
EOF

-- run in background
clickhouse-client --query="OPTIMIZE TABLE a FINAL" &

-- waits for merge to be finished (should kill the background merge, no reason to wait for it to be finished)
clickhouse-client --query="alter table a replace partition id 'all' from b"
@tavplubix
Copy link
Member

waits for merge to be finished (should kill the background merge, no reason to wait for it to be finished)

It does kill background merges. But a merge (just like any operation in CLickHouse) cannot be killed at an arbitrary point, only between the processing of blocks.

@tavplubix tavplubix added question Question? and removed unexpected behaviour labels Jan 17, 2023
@filimonov filimonov changed the title REPLACE PARTITION ... FROM ... waits for the merges REPLACE PARTITION ... FROM ... waits for the merges in unaffected partitions Feb 3, 2023
@filimonov filimonov reopened this Feb 3, 2023
@filimonov
Copy link
Contributor Author

filimonov commented Feb 3, 2023

It actually does 'stop-the-world' thing, so it stop ALL the merges (also in unaffected partitions), and that can take a lot of time.

Test:

DROP TABLE IF EXISTS xyz;

create table xyz (p UInt8, i UInt64) Engine=MergeTree PARTITION BY p ORDER BY tuple();

SYSTEM STOP MERGES xyz;

-- let's create a lot of partition with a lot of parts which should be merged (mimic 'debt' after high inserts pressure):

-- select format('SET min_insert_block_size_rows = {1}; INSERT INTO xyz SELECT {0}, * FROM numbers({1}*100);', toString(number), toString(number*8192)) from numbers(50) FORMAT TSVRaw;

-- I've create partitions with parts of different size (so it can pick smaller and bigger)
SET min_insert_block_size_rows = 65536; INSERT INTO xyz SELECT 8, * FROM numbers(65536*100);
SET min_insert_block_size_rows = 73728; INSERT INTO xyz SELECT 9, * FROM numbers(73728*100);
SET min_insert_block_size_rows = 81920; INSERT INTO xyz SELECT 10, * FROM numbers(81920*100);
SET min_insert_block_size_rows = 90112; INSERT INTO xyz SELECT 11, * FROM numbers(90112*100);
SET min_insert_block_size_rows = 98304; INSERT INTO xyz SELECT 12, * FROM numbers(98304*100);
SET min_insert_block_size_rows = 106496; INSERT INTO xyz SELECT 13, * FROM numbers(106496*100);
SET min_insert_block_size_rows = 114688; INSERT INTO xyz SELECT 14, * FROM numbers(114688*100);
SET min_insert_block_size_rows = 122880; INSERT INTO xyz SELECT 15, * FROM numbers(122880*100);
SET min_insert_block_size_rows = 131072; INSERT INTO xyz SELECT 16, * FROM numbers(131072*100);
SET min_insert_block_size_rows = 139264; INSERT INTO xyz SELECT 17, * FROM numbers(139264*100);
SET min_insert_block_size_rows = 147456; INSERT INTO xyz SELECT 18, * FROM numbers(147456*100);
SET min_insert_block_size_rows = 155648; INSERT INTO xyz SELECT 19, * FROM numbers(155648*100);
SET min_insert_block_size_rows = 163840; INSERT INTO xyz SELECT 20, * FROM numbers(163840*100);
SET min_insert_block_size_rows = 172032; INSERT INTO xyz SELECT 21, * FROM numbers(172032*100);
SET min_insert_block_size_rows = 180224; INSERT INTO xyz SELECT 22, * FROM numbers(180224*100);
SET min_insert_block_size_rows = 188416; INSERT INTO xyz SELECT 23, * FROM numbers(188416*100);
SET min_insert_block_size_rows = 196608; INSERT INTO xyz SELECT 24, * FROM numbers(196608*100);
SET min_insert_block_size_rows = 204800; INSERT INTO xyz SELECT 25, * FROM numbers(204800*100);
SET min_insert_block_size_rows = 212992; INSERT INTO xyz SELECT 26, * FROM numbers(212992*100);
SET min_insert_block_size_rows = 221184; INSERT INTO xyz SELECT 27, * FROM numbers(221184*100);
SET min_insert_block_size_rows = 229376; INSERT INTO xyz SELECT 28, * FROM numbers(229376*100);
SET min_insert_block_size_rows = 237568; INSERT INTO xyz SELECT 29, * FROM numbers(237568*100);
SET min_insert_block_size_rows = 245760; INSERT INTO xyz SELECT 30, * FROM numbers(245760*100);
SET min_insert_block_size_rows = 253952; INSERT INTO xyz SELECT 31, * FROM numbers(253952*100);
SET min_insert_block_size_rows = 262144; INSERT INTO xyz SELECT 32, * FROM numbers(262144*100);
SET min_insert_block_size_rows = 270336; INSERT INTO xyz SELECT 33, * FROM numbers(270336*100);
SET min_insert_block_size_rows = 278528; INSERT INTO xyz SELECT 34, * FROM numbers(278528*100);
SET min_insert_block_size_rows = 286720; INSERT INTO xyz SELECT 35, * FROM numbers(286720*100);
SET min_insert_block_size_rows = 294912; INSERT INTO xyz SELECT 36, * FROM numbers(294912*100);
SET min_insert_block_size_rows = 303104; INSERT INTO xyz SELECT 37, * FROM numbers(303104*100);
SET min_insert_block_size_rows = 311296; INSERT INTO xyz SELECT 38, * FROM numbers(311296*100);
SET min_insert_block_size_rows = 319488; INSERT INTO xyz SELECT 39, * FROM numbers(319488*100);
SET min_insert_block_size_rows = 327680; INSERT INTO xyz SELECT 40, * FROM numbers(327680*100);
SET min_insert_block_size_rows = 335872; INSERT INTO xyz SELECT 41, * FROM numbers(335872*100);
SET min_insert_block_size_rows = 344064; INSERT INTO xyz SELECT 42, * FROM numbers(344064*100);
SET min_insert_block_size_rows = 352256; INSERT INTO xyz SELECT 43, * FROM numbers(352256*100);
SET min_insert_block_size_rows = 360448; INSERT INTO xyz SELECT 44, * FROM numbers(360448*100);
SET min_insert_block_size_rows = 368640; INSERT INTO xyz SELECT 45, * FROM numbers(368640*100);
SET min_insert_block_size_rows = 376832; INSERT INTO xyz SELECT 46, * FROM numbers(376832*100);
SET min_insert_block_size_rows = 385024; INSERT INTO xyz SELECT 47, * FROM numbers(385024*100);
SET min_insert_block_size_rows = 393216; INSERT INTO xyz SELECT 48, * FROM numbers(393216*100);

-- that should give a merge speed 1 / 0.00002 = 50k rows per second
-- with max block size 64Kb it is max ~1 sec of waiting (till the end of block)
ALTER TABLE xyz ADD COLUMN make_merge_slower UInt8 DEFAULT sleepEachRow(0.00002);

-- prepare the table to replace partition from
DROP TABLE IF EXISTS foo;
CREATE TABLE foo AS xyz;
INSERT INTO foo select 48, 100000 + number, 0 FROM numbers(1000000);

SYSTEM START MERGES xyz;

-- now do some more inserts in all partition to trigger background merges
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);
INSERT INTO xyz(p,i) SELECT number + 8, 1 FROM numbers(41);

select partition_id, rows_read, rows_written, elapsed, progress FROM system.merges WHERE table ='xyz' ORDER BY toUInt32(partition_id);

-- check how long will it take:
alter table xyz replace partition id '48' from foo;

-- in meanwhile you can check in different session: no merges for partition 48! 
select partition_id, rows_read, rows_written, elapsed, progress FROM system.merges WHERE table ='xyz' ORDER BY toUInt32(partition_id);

-- replace partition actually stops merges for ALL the partitions in the table(!)
-- so after the slowest merge is finished, it will go the replace partition and then start merging again

-- stack trace of the stuck replace partition:

-- pthread_cond_timedwait
-- DB::StorageMergeTree::stopMergesAndWait()
-- DB::StorageMergeTree::replacePartitionFrom(std::__1::shared_ptr<DB::IStorage> const&, std::__1::shared_ptr<DB::IAST> const&, bool, std::__1::shared_ptr<DB::Context const>)
-- DB::MergeTreeData::alterPartition(std::__1::shared_ptr<DB::StorageInMemoryMetadata const> const&, std::__1::vector<DB::PartitionCommand, std::__1::allocator<DB::PartitionCommand>> const&, std::__1::shared_ptr<DB::Context const>)
-- DB::InterpreterAlterQuery::executeToTable(DB::ASTAlterQuery const&)
-- DB::InterpreterAlterQuery::execute()
-- DB::executeQueryImpl(char const*, char const*, std::__1::shared_ptr<DB::Context>, bool, DB::QueryProcessingStage::Enum, DB::ReadBuffer*)
-- DB::executeQuery(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&, std::__1::shared_ptr<DB::Context>, bool, DB::QueryProcessingStage::Enum)
-- DB::TCPHandler::runImpl()
-- DB::TCPHandler::run()
-- Poco::Net::TCPServerConnection::start()
-- Poco::Net::TCPServerDispatcher::run()
-- Poco::PooledThread::run()
-- Poco::ThreadImpl::runnableEntry(void*)

@filimonov filimonov added unexpected behaviour and removed question Question? labels Feb 3, 2023
@filimonov
Copy link
Contributor Author

Generally it seem the task is to fix that TODO:

/// TODO allow to stop merges in specific partition only (like it's done in ReplicatedMergeTree)
std::unique_lock lock(currently_processing_in_background_mutex);

@aadant
Copy link

aadant commented Mar 15, 2023

@filimonov any progress on this ?

@aadant
Copy link

aadant commented Mar 18, 2023

Same thing with DROP PARTITION, MOVE PARTITION, etc ... those should not wait for merges (even moreover merges that were not even started)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

3 participants