Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cherry pick #63426 to 23.8: Fix backup/restore of projection part in case projection was removed from table metadata, but part still has projection #63600

15 changes: 12 additions & 3 deletions src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,20 @@ UInt32 DataPartStorageOnDiskFull::getRefCount(const String & file_name) const
return volume->getDisk()->getRefCount(fs::path(root_path) / part_dir / file_name);
}

std::string DataPartStorageOnDiskFull::getRemotePath(const std::string & file_name) const
std::string DataPartStorageOnDiskFull::getRemotePath(const std::string & file_name, bool if_exists) const
{
auto objects = volume->getDisk()->getStorageObjects(fs::path(root_path) / part_dir / file_name);
const std::string path = fs::path(root_path) / part_dir / file_name;
auto objects = volume->getDisk()->getStorageObjects(path);

if (objects.empty() && if_exists)
return "";

if (objects.size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "One file must be mapped to one object on blob storage in MergeTree tables");
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"One file must be mapped to one object on blob storage by path {} in MergeTree tables, have {}.",
path, objects.size());
}

return objects[0].remote_path;
}
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/DataPartStorageOnDiskFull.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DataPartStorageOnDiskFull final : public DataPartStorageOnDiskBase
Poco::Timestamp getFileLastModified(const String & file_name) const override;
size_t getFileSize(const std::string & file_name) const override;
UInt32 getRefCount(const std::string & file_name) const override;
std::string getRemotePath(const std::string & file_name) const override;
std::string getRemotePath(const std::string & file_name, bool if_exists) const override;
String getUniqueId() const override;

std::unique_ptr<ReadBufferFromFileBase> readFile(
Expand Down
2 changes: 1 addition & 1 deletion src/Storages/MergeTree/IDataPartStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class IDataPartStorage : public boost::noncopyable
virtual UInt32 getRefCount(const std::string & file_name) const = 0;

/// Get path on remote filesystem from file name on local filesystem.
virtual std::string getRemotePath(const std::string & file_name) const = 0;
virtual std::string getRemotePath(const std::string & file_name, bool if_exists) const = 0;

virtual UInt64 calculateTotalSizeOnDisk() const = 0;

Expand Down
40 changes: 35 additions & 5 deletions src/Storages/MergeTree/MergeTreeData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5308,18 +5308,48 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
backup_entries_from_part,
&temp_dirs);

auto projection_parts = part->getProjectionParts();
for (const auto & [projection_name, projection_part] : projection_parts)
auto backup_projection = [&](IDataPartStorage & storage, IMergeTreeDataPart & projection_part)
{
projection_part->getDataPartStorage().backup(
projection_part->checksums,
projection_part->getFileNamesWithoutChecksums(),
storage.backup(
projection_part.checksums,
projection_part.getFileNamesWithoutChecksums(),
fs::path{data_path_in_backup} / part->name,
backup_settings,
read_settings,
make_temporary_hard_links,
backup_entries_from_part,
&temp_dirs);
};

auto projection_parts = part->getProjectionParts();
std::string proj_suffix = ".proj";
std::unordered_set<String> defined_projections;

for (const auto & [projection_name, projection_part] : projection_parts)
{
defined_projections.emplace(projection_name);
backup_projection(projection_part->getDataPartStorage(), *projection_part);
}

/// It is possible that the part has a written but not loaded projection,
/// e.g. it is written to parent part's checksums.txt and exists on disk,
/// but does not exist in table's projections definition.
/// Such a part can appear server was restarted after DROP PROJECTION but before old part was removed.
/// In this case, the old part will load only projections from metadata.
/// See 031145_non_loaded_projection_backup.sh.
for (const auto & [name, _] : part->checksums.files)
{
auto projection_name = fs::path(name).stem().string();
if (endsWith(name, proj_suffix) && !defined_projections.contains(projection_name))
{
auto projection_storage = part->getDataPartStorage().getProjection(projection_name + proj_suffix);
if (projection_storage->exists("checksums.txt"))
{
auto projection_part = const_cast<IMergeTreeDataPart &>(*part).getProjectionPartBuilder(
projection_name, /* is_temp_projection */false).withPartFormatFromDisk().build();
backup_projection(projection_part->getDataPartStorage(), *projection_part);
}
}
}

if (hold_storage_and_part_ptrs)
Expand Down
13 changes: 11 additions & 2 deletions src/Storages/MergeTree/checkDataPart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,17 @@ IMergeTreeDataPart::Checksums checkDataPart(
auto file_name = it->name();
if (!data_part_storage.isDirectory(file_name))
{
auto remote_path = data_part_storage.getRemotePath(file_name);
cache.removePathIfExists(remote_path);
const bool is_projection_part = data_part->isProjectionPart();
auto remote_path = data_part_storage.getRemotePath(file_name, /* if_exists */is_projection_part);
if (remote_path.empty())
{
chassert(is_projection_part);
throw Exception(
ErrorCodes::BROKEN_PROJECTION,
"Remote path for {} does not exist for projection path. Projection {} is broken",
file_name, data_part->name);
}
cache.removePathIfExists(remote_path, FileCache::getCommonUser().user_id);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
7
Found unexpected projection directories: pp.proj
BACKUP_CREATED
RESTORED
7
Found unexpected projection directories: pp.proj
0
53 changes: 53 additions & 0 deletions tests/queries/0_stateless/03145_non_loaded_projection_backup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh

$CLICKHOUSE_CLIENT -nm -q "
drop table if exists tp_1;
create table tp_1 (x Int32, y Int32, projection p (select x, y order by x)) engine = MergeTree order by y partition by intDiv(y, 100);
system stop merges tp_1;
insert into tp_1 select number, number from numbers(3);

set mutations_sync = 2;

alter table tp_1 add projection pp (select x, count() group by x);
insert into tp_1 select number, number from numbers(4);
select count() from tp_1;

-- Here we have a part with written projection pp
alter table tp_1 detach partition '0';
-- Move part to detached
alter table tp_1 clear projection pp;
-- Remove projection from table metadata
alter table tp_1 drop projection pp;
-- Now, we don't load projection pp for attached part, but it is written on disk
alter table tp_1 attach partition '0';
"

$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj"

backup_id="$CLICKHOUSE_TEST_UNIQUE_NAME"
$CLICKHOUSE_CLIENT -q "
backup table tp_1 to Disk('backups', '$backup_id');
" | grep -o "BACKUP_CREATED"

$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
drop table tp_1;
restore table tp_1 from Disk('backups', '$backup_id');
" | grep -o "RESTORED"

$CLICKHOUSE_CLIENT -q "select count() from tp_1;"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1 settings check_query_single_value_result = 0;" | grep -o "Found unexpected projection directories: pp.proj"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
check table tp_1"
$CLICKHOUSE_CLIENT -nm -q "
set send_logs_level='fatal';
drop table tp_1"