Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reproduce and fix the bug in removeSharedRecursive #54430

Merged
merged 4 commits into from Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
68 changes: 54 additions & 14 deletions src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
Expand Up @@ -7,6 +7,7 @@
#include <Common/Exception.h>

#include <Disks/ObjectStorages/MetadataStorageFromDisk.h>
#include <boost/algorithm/string/join.hpp>

namespace DB
{
Expand Down Expand Up @@ -156,14 +157,13 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation

struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperation
{
RemoveBatchRequest remove_paths;
bool keep_all_batch_data;
NameSet file_names_remove_metadata_only;
const RemoveBatchRequest remove_paths;
const bool keep_all_batch_data;
const NameSet file_names_remove_metadata_only;

std::vector<String> paths_removed_with_objects;
std::vector<ObjectsToRemove> objects_to_remove;

bool remove_from_cache = false;

RemoveManyObjectStorageOperation(
IObjectStorage & object_storage_,
IMetadataStorage & metadata_storage_,
Expand Down Expand Up @@ -203,6 +203,7 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
if (unlink_outcome && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename()))
{
objects_to_remove.emplace_back(ObjectsToRemove{std::move(objects), std::move(unlink_outcome)});
paths_removed_with_objects.push_back(path);
}
}
catch (const Exception & e)
Expand All @@ -213,6 +214,12 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
|| e.code() == ErrorCodes::CANNOT_READ_ALL_DATA
|| e.code() == ErrorCodes::CANNOT_OPEN_FILE)
{
LOG_DEBUG(
&Poco::Logger::get("RemoveManyObjectStorageOperation"),
"Can't read metadata because of an exception. Just remove it from the filesystem. Path: {}, exception: {}",
metadata_storage.getPath() + path,
e.message());

tx->unlinkFile(path);
}
else
Expand All @@ -238,16 +245,31 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
/// TL;DR Don't pay any attention to 404 status code
if (!remove_from_remote.empty())
object_storage.removeObjectsIfExist(remove_from_remote);

if (!keep_all_batch_data)
{
LOG_DEBUG(
&Poco::Logger::get("RemoveManyObjectStorageOperation"),
"metadata and objects were removed for [{}], "
"only metadata were removed for [{}].",
boost::algorithm::join(paths_removed_with_objects, ", "),
boost::algorithm::join(file_names_remove_metadata_only, ", "));
}
}
};


struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOperation
{
std::string path;
/// path inside disk with metadata
const std::string path;
const bool keep_all_batch_data;
/// paths inside the 'this->path'
const NameSet file_names_remove_metadata_only;

/// map from local_path to its remote objects with hardlinks counter
/// local_path is the path inside 'this->path'
std::unordered_map<std::string, ObjectsToRemove> objects_to_remove_by_path;
bool keep_all_batch_data;
NameSet file_names_remove_metadata_only;

RemoveRecursiveObjectStorageOperation(
IObjectStorage & object_storage_,
Expand All @@ -274,11 +296,16 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
{
try
{
chassert(path_to_remove.starts_with(path));
auto rel_path = String(fs::relative(fs::path(path_to_remove), fs::path(path)));

auto objects_paths = metadata_storage.getStorageObjects(path_to_remove);
auto unlink_outcome = tx->unlinkMetadata(path_to_remove);
if (unlink_outcome)

if (unlink_outcome && !file_names_remove_metadata_only.contains(rel_path))
{
objects_to_remove_by_path[path_to_remove] = ObjectsToRemove{std::move(objects_paths), std::move(unlink_outcome)};
objects_to_remove_by_path[std::move(rel_path)]
= ObjectsToRemove{std::move(objects_paths), std::move(unlink_outcome)};
}
}
catch (const Exception & e)
Expand Down Expand Up @@ -320,25 +347,38 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp

void undo() override
{

}

void finalize() override
{
if (!keep_all_batch_data)
{
std::vector<String> total_removed_paths;
total_removed_paths.reserve(objects_to_remove_by_path.size());

StoredObjects remove_from_remote;
for (auto && [local_path, objects_to_remove] : objects_to_remove_by_path)
{
if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename()))
chassert(!file_names_remove_metadata_only.contains(local_path));
if (objects_to_remove.unlink_outcome->num_hardlinks == 0)
{
if (objects_to_remove.unlink_outcome->num_hardlinks == 0)
std::move(objects_to_remove.objects.begin(), objects_to_remove.objects.end(), std::back_inserter(remove_from_remote));
std::move(objects_to_remove.objects.begin(), objects_to_remove.objects.end(), std::back_inserter(remove_from_remote));
total_removed_paths.push_back(local_path);
}
}

/// Read comment inside RemoveObjectStorageOperation class
/// TL;DR Don't pay any attention to 404 status code
object_storage.removeObjectsIfExist(remove_from_remote);

LOG_DEBUG(
&Poco::Logger::get("RemoveRecursiveObjectStorageOperation"),
"Recursively remove path {}: "
"metadata and objects were removed for [{}], "
"only metadata were removed for [{}].",
path,
boost::algorithm::join(total_removed_paths, ", "),
boost::algorithm::join(file_names_remove_metadata_only, ", "));
}
}
};
Expand Down
4 changes: 2 additions & 2 deletions src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
Expand Up @@ -320,7 +320,7 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis

throwIfUnexpectedError(outcome, if_exists);

LOG_TRACE(log, "Object with path {} was removed from S3", object.remote_path);
LOG_DEBUG(log, "Object with path {} was removed from S3", object.remote_path);
}

void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_exists)
Expand Down Expand Up @@ -368,7 +368,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e

throwIfUnexpectedError(outcome, if_exists);

LOG_TRACE(log, "Objects with paths [{}] were removed from S3", keys);
LOG_DEBUG(log, "Objects with paths [{}] were removed from S3", keys);
}
}
}
Expand Down
9 changes: 9 additions & 0 deletions src/Storages/StorageReplicatedMergeTree.cpp
Expand Up @@ -1593,6 +1593,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd
while (true)
{
LOG_DEBUG(log, "Committing part {} to zookeeper", part->name);

Coordination::Requests ops;
NameSet absent_part_paths_on_replicas;

Expand Down Expand Up @@ -8788,6 +8789,14 @@ void StorageReplicatedMergeTree::getLockSharedDataOps(
{
String zookeeper_node = fs::path(zc_zookeeper_path) / id / replica_name;

if (!path_to_set_hardlinked_files.empty() && !hardlinks.empty())
{
LOG_DEBUG(log, "Locking shared node {} with hardlinks from the other shared node {}, "
"hardlinks: [{}]",
zookeeper_node, path_to_set_hardlinked_files,
boost::algorithm::join(hardlinks, ","));
}

getZeroCopyLockNodeCreateOps(
zookeeper, zookeeper_node, requests, zkutil::CreateMode::Persistent,
replace_existing_lock, path_to_set_hardlinked_files, hardlinks);
Expand Down
@@ -0,0 +1,28 @@
<clickhouse>

<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>http://minio1:9001/root/data/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
<skip_access_check>true</skip_access_check>
</s3>
</disks>
<policies>
<s3>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3>
</policies>
</storage_configuration>

<merge_tree>
<allow_remote_fs_zero_copy_replication>1</allow_remote_fs_zero_copy_replication>
</merge_tree>

</clickhouse>
@@ -0,0 +1,7 @@
<clickhouse>
<profiles>
<default>
<enable_s3_requests_logging>1</enable_s3_requests_logging>
</default>
</profiles>
</clickhouse>