Skip to content

Commit

Permalink
Merge pull request #63600 from ClickHouse/cherrypick/23.8/e2756e02c2e…
Browse files Browse the repository at this point in the history
…d1147fb21f5a2594524df9f9b437e

Cherry pick #63426 to 23.8: Fix backup/restore of projection part in case projection was removed from table metadata, but part still has projection
  • Loading branch information
robot-ch-test-poll3 committed May 21, 2024
2 parents 695d9cf + 89dadce commit 2c5d79f
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 12 deletions.
15 changes: 12 additions & 3 deletions src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,20 @@ UInt32 DataPartStorageOnDiskFull::getRefCount(const String & file_name) const
return volume->getDisk()->getRefCount(fs::path(root_path) / part_dir / file_name);
}

std::string DataPartStorageOnDiskFull::getRemotePath(const std::string & file_name) const
std::string DataPartStorageOnDiskFull::getRemotePath(const std::string & file_name, bool if_exists) const
{
auto objects = volume->getDisk()->getStorageObjects(fs::path(root_path) / part_dir / file_name);
const std::string path = fs::path(root_path) / part_dir / file_name;
auto objects = volume->getDisk()->getStorageObjects(path);

if (objects.empty() && if_exists)
return "";

if (objects.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "One file must be mapped to one object on blob storage in MergeTree tables");
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"One file must be mapped to one object on blob storage by path {} in MergeTree tables, have {}.",
path, objects.size());
}

return objects[0].remote_path;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/DataPartStorageOnDiskFull.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DataPartStorageOnDiskFull final : public DataPartStorageOnDiskBase
Poco::Timestamp getFileLastModified(const String & file_name) const override;
size_t getFileSize(const std::string & file_name) const override;
UInt32 getRefCount(const std::string & file_name) const override;
std::string getRemotePath(const std::string & file_name) const override;
std::string getRemotePath(const std::string & file_name, bool if_exists) const override;
String getUniqueId() const override;

std::unique_ptr<ReadBufferFromFileBase> readFile(
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/IDataPartStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class IDataPartStorage : public boost::noncopyable
virtual UInt32 getRefCount(const std::string & file_name) const = 0;

/// Get path on remote filesystem from file name on local filesystem.
virtual std::string getRemotePath(const std::string & file_name) const = 0;
virtual std::string getRemotePath(const std::string & file_name, bool if_exists) const = 0;

virtual UInt64 calculateTotalSizeOnDisk() const = 0;

Expand Down
40 changes: 35 additions & 5 deletions src/Storages/MergeTree/MergeTreeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5308,18 +5308,48 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
backup_entries_from_part,
&temp_dirs);

auto projection_parts = part->getProjectionParts();
for (const auto & [projection_name, projection_part] : projection_parts)
auto backup_projection = [&](IDataPartStorage & storage, IMergeTreeDataPart & projection_part)
{
projection_part->getDataPartStorage().backup(
projection_part->checksums,
projection_part->getFileNamesWithoutChecksums(),
storage.backup(
projection_part.checksums,
projection_part.getFileNamesWithoutChecksums(),
fs::path{data_path_in_backup} / part->name,
backup_settings,
read_settings,
make_temporary_hard_links,
backup_entries_from_part,
&temp_dirs);
};

auto projection_parts = part->getProjectionParts();
std::string proj_suffix = ".proj";
std::unordered_set<String> defined_projections;

for (const auto & [projection_name, projection_part] : projection_parts)
{
defined_projections.emplace(projection_name);
backup_projection(projection_part->getDataPartStorage(), *projection_part);
}

/// It is possible that the part has a written but not loaded projection,
/// e.g. it is written to parent part's checksums.txt and exists on disk,
/// but does not exist in table's projections definition.
/// Such a part can appear server was restarted after DROP PROJECTION but before old part was removed.
/// In this case, the old part will load only projections from metadata.
/// See 031145_non_loaded_projection_backup.sh.
for (const auto & [name, _] : part->checksums.files)
{
auto projection_name = fs::path(name).stem().string();
if (endsWith(name, proj_suffix) && !defined_projections.contains(projection_name))
{
auto projection_storage = part->getDataPartStorage().getProjection(projection_name + proj_suffix);
if (projection_storage->exists("checksums.txt"))
{
auto projection_part = const_cast<IMergeTreeDataPart &>(*part).getProjectionPartBuilder(
projection_name, /* is_temp_projection */false).withPartFormatFromDisk().build();
backup_projection(projection_part->getDataPartStorage(), *projection_part);
}
}
}

if (hold_storage_and_part_ptrs)
Expand Down
13 changes: 11 additions & 2 deletions src/Storages/MergeTree/checkDataPart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,17 @@ IMergeTreeDataPart::Checksums checkDataPart(
auto file_name = it->name();
if (!data_part_storage.isDirectory(file_name))
{
auto remote_path = data_part_storage.getRemotePath(file_name);
cache.removePathIfExists(remote_path);
const bool is_projection_part = data_part->isProjectionPart();
auto remote_path = data_part_storage.getRemotePath(file_name, /* if_exists */is_projection_part);
if (remote_path.empty())
{
chassert(is_projection_part);
throw Exception(
ErrorCodes::BROKEN_PROJECTION,
"Remote path for {} does not exist for projection path. Projection {} is broken",
file_name, data_part->name);
}
cache.removePathIfExists(remote_path, FileCache::getCommonUser().user_id);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
7
Found unexpected projection directories: pp.proj
BACKUP_CREATED
RESTORED
7
Found unexpected projection directories: pp.proj
0
53 changes: 53 additions & 0 deletions tests/queries/0_stateless/03145_non_loaded_projection_backup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh

$CLICKHOUSE_CLIENT -nm -q "
drop table if exists tp_1;
create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100);
system stop merges tp_1;
insert into tp_1 select number, number from numbers(3);
set mutations_sync = 2;
alter table tp_1 add projection pp (select x, count() group by x);
insert into tp_1 select number, number from numbers(4);
select count() from tp_1;
-- Here we have a part with written projection pp
alter table tp_1 detach partition '0';
-- Move part to detached
alter table tp_1 clear projection pp;
-- Remove projection from table metadata
alter table tp_1 drop projection pp;
-- Now, we don't load projection pp for attached part, but it is written on disk
alter table tp_1 attach partition '0';
"

$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj"

backup_id="$CLICKHOUSE_TEST_UNIQUE_NAME"
$CLICKHOUSE_CLIENT -q "
backup table tp_1 to Disk('backups', '$backup_id');
" | grep -o "BACKUP_CREATED"

$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
drop table tp_1;
restore table tp_1 from Disk('backups', '$backup_id');
" | grep -o "RESTORED"

$CLICKHOUSE_CLIENT -q "select count() from tp_1;"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
drop table tp_1"

0 comments on commit 2c5d79f

Please sign in to comment.