Skip to content

Commit

Permalink
Remove existing_count.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
jewelzqiu committed Jan 18, 2024
1 parent a63c8cd commit 7d8243a
Show file tree
Hide file tree
Showing 9 changed files with 7 additions and 49 deletions.
1 change: 0 additions & 1 deletion src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,6 @@ void DataPartStorageOnDiskBase::clearDirectory(
request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true);
request.emplace_back(fs::path(dir) / "txn_version.txt", true);
request.emplace_back(fs::path(dir) / "metadata_version.txt", true);
request.emplace_back(fs::path(dir) / "existing_count.txt", true);

disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove);
disk->removeDirectory(dir);
Expand Down
3 changes: 1 addition & 2 deletions src/Storages/MergeTree/DataPartsExchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -844,8 +844,7 @@ void Fetcher::downloadBaseOrProjectionPartToDisk(
if (file_name != "checksums.txt" &&
file_name != "columns.txt" &&
file_name != IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME &&
file_name != IMergeTreeDataPart::METADATA_VERSION_FILE_NAME &&
file_name != IMergeTreeDataPart::EXISTING_COUNT_FILE_NAME)
file_name != IMergeTreeDataPart::METADATA_VERSION_FILE_NAME)
checksums.addFile(file_name, file_size, expected_hash);
}

Expand Down
25 changes: 2 additions & 23 deletions src/Storages/MergeTree/IMergeTreeDataPart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,9 +869,6 @@ NameSet IMergeTreeDataPart::getFileNamesWithoutChecksums() const
if (getDataPartStorage().exists(METADATA_VERSION_FILE_NAME))
result.emplace(METADATA_VERSION_FILE_NAME);

if (getDataPartStorage().exists(EXISTING_COUNT_FILE_NAME))
result.emplace(EXISTING_COUNT_FILE_NAME);

return result;
}

Expand Down Expand Up @@ -1302,29 +1299,11 @@ void IMergeTreeDataPart::loadRowsCount()

void IMergeTreeDataPart::loadExistingRowsCount()
{
if (!rows_count)
{
existing_rows_count = 0;
}
else if (!supportLightweightDeleteMutate() || !hasLightweightDelete())
{
if (!rows_count || !storage.getSettings()->load_existing_rows_count_for_old_parts || !supportLightweightDeleteMutate()
|| !hasLightweightDelete())
existing_rows_count = rows_count;
}
else if (getDataPartStorage().exists(EXISTING_COUNT_FILE_NAME))
{
auto buf = metadata_manager->read(EXISTING_COUNT_FILE_NAME);
readIntText(existing_rows_count, *buf);
assertEOF(*buf);
}
else
{
existing_rows_count = readExistingRowsCount();

auto out_existing_count = getDataPartStorage().writeFile(EXISTING_COUNT_FILE_NAME, 4096, storage.getContext()->getWriteSettings());
writeIntText(existing_rows_count, *out_existing_count);
out_existing_count->finalize();
out_existing_count->sync();
}
}

UInt64 IMergeTreeDataPart::readExistingRowsCount()
Expand Down
6 changes: 1 addition & 5 deletions src/Storages/MergeTree/IMergeTreeDataPart.h
Original file line number Diff line number Diff line change
Expand Up @@ -462,9 +462,6 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar

static inline constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt";

/// File that contains existing (excluding lightweight deleted) rows count of the part
static inline constexpr auto EXISTING_COUNT_FILE_NAME = "existing_count.txt";

/// One of part files which is used to check how many references (I'd like
/// to say hardlinks, but it will confuse even more) we have for the part
/// for zero copy replication. Sadly it's very complex.
Expand Down Expand Up @@ -672,8 +669,7 @@ class IMergeTreeDataPart : public std::enable_shared_from_this<IMergeTreeDataPar
/// For the older format version calculates rows count from the size of a column with a fixed size.
void loadRowsCount();

/// Load existing rows count for this part from disk if existing_count.txt exists.
/// Otherwise read from _row_exists column.
/// Load existing rows count from _row_exists column if load_existing_rows_count_for_old_parts is true.
void loadExistingRowsCount();

static void appendFilesOfRowsCount(Strings & files);
Expand Down
1 change: 1 addition & 0 deletions src/Storages/MergeTree/MergeTreeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8311,6 +8311,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::createE

new_data_part->setColumns(columns, {}, metadata_snapshot->getMetadataVersion());
new_data_part->rows_count = block.rows();
new_data_part->existing_rows_count = block.rows();

new_data_part->partition = partition;

Expand Down
1 change: 1 addition & 0 deletions src/Storages/MergeTree/MergeTreeDataWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(

new_data_part->setColumns(columns, infos, metadata_snapshot->getMetadataVersion());
new_data_part->rows_count = block.rows();
new_data_part->existing_rows_count = block.rows();
new_data_part->partition = std::move(partition);
new_data_part->minmax_idx = std::move(minmax_idx);
new_data_part->is_temp = true;
Expand Down
1 change: 1 addition & 0 deletions src/Storages/MergeTree/MergeTreeSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct Settings;
M(UInt64, compact_parts_max_bytes_to_buffer, 128 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \
M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \
M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \
M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \
\
/** Merge settings. */ \
M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \
Expand Down
9 changes: 0 additions & 9 deletions src/Storages/MergeTree/MergedBlockOutputStream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,15 +316,6 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
"It is a bug.", new_part->name);
}

/// For backward compatibility, we don't add existing_count.txt into checksums
if (new_part->supportLightweightDeleteMutate() && new_part->hasLightweightDelete() && new_part->existing_rows_count <= rows_count)
{
auto out = new_part->getDataPartStorage().writeFile(IMergeTreeDataPart::EXISTING_COUNT_FILE_NAME, 4096, write_settings);
writeIntText(new_part->existing_rows_count, *out);
out->preFinalize();
written_files.emplace_back(std::move(out));
}

{
/// Write file with checksums.
auto out = new_part->getDataPartStorage().writeFile("checksums.txt", 4096, write_settings);
Expand Down
9 changes: 0 additions & 9 deletions src/Storages/MergeTree/MutateTask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -878,15 +878,6 @@ void finalizeMutatedPart(
written_files.push_back(std::move(out_columns));
}

/// Existing parts with lightweight delete will generate existing_count.txt on loading and will not be added into checksums,
/// For compatibility concerns, we don't add existing_count.txt into checksums here
if (new_data_part->supportLightweightDeleteMutate() && new_data_part->hasLightweightDelete())
{
auto out_existing_count = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::EXISTING_COUNT_FILE_NAME, 4096, context->getWriteSettings());
writeIntText(new_data_part->existing_rows_count, *out_existing_count);
written_files.push_back(std::move(out_existing_count));
}

for (auto & file : written_files)
{
file->finalize();
Expand Down

0 comments on commit 7d8243a

Please sign in to comment.